Merge branch 'main' into feature-label

2021-04-14 17:23:17 +00:00 · 2021-04-14 17:23:17 +00:00 · 7247467225
parent b66df3e57f 59ca090aef
commit 7247467225
82 changed files with 3783 additions and 2836 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -111,7 +111,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 [[package]]
 name = "arrow"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
+source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
 dependencies = [
 "cfg_aliases",
 "chrono",
@ -134,7 +134,7 @@ dependencies = [
 [[package]]
 name = "arrow-flight"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
+source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
 dependencies = [
 "arrow",
 "bytes",
@ -429,9 +429,9 @@ dependencies = [

 [[package]]
 name = "cast"
-version = "0.2.3"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0"
+checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75"
 dependencies = [
 "rustc_version",
 ]
@ -488,9 +488,9 @@ dependencies = [

 [[package]]
 name = "clang-sys"
-version = "1.1.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f54d78e30b388d4815220c8dd03fea5656b6c6d32adb59e89061552a102f8da1"
+checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c"
 dependencies = [
 "glob",
 "libc",
@ -599,6 +599,7 @@ dependencies = [
 "clap",
 "criterion-plot",
 "csv",
+ "futures",
 "itertools 0.10.0",
 "lazy_static",
 "num-traits",
@ -611,6 +612,7 @@ dependencies = [
 "serde_derive",
 "serde_json",
 "tinytemplate",
+ "tokio",
 "walkdir",
 ]

@ -662,9 +664,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-channel"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
+checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
 dependencies = [
 "cfg-if 1.0.0",
 "crossbeam-utils",
@ -787,7 +789,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
+source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
 dependencies = [
 "ahash 0.7.2",
 "arrow",
@ -1044,9 +1046,9 @@ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"

 [[package]]
 name = "futures"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
+checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
 dependencies = [
 "futures-channel",
 "futures-core",
@ -1059,9 +1061,9 @@ dependencies = [

 [[package]]
 name = "futures-channel"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
+checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
 dependencies = [
 "futures-core",
 "futures-sink",
@ -1069,15 +1071,15 @@ dependencies = [

 [[package]]
 name = "futures-core"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
+checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"

 [[package]]
 name = "futures-executor"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
+checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
 dependencies = [
 "futures-core",
 "futures-task",
@ -1086,15 +1088,15 @@ dependencies = [

 [[package]]
 name = "futures-io"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
+checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"

 [[package]]
 name = "futures-macro"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
+checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
 dependencies = [
 "proc-macro-hack",
 "proc-macro2",
@ -1104,21 +1106,21 @@ dependencies = [

 [[package]]
 name = "futures-sink"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
+checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"

 [[package]]
 name = "futures-task"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
+checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"

 [[package]]
 name = "futures-test"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1fe5e51002528907757d5f1648101086f7197f792112db43ba23b06b09e6bce"
+checksum = "e77baeade98824bc928c21b8ad39918b9d8a06745ebdb6e2c93fb7673fb7968d"
 dependencies = [
 "futures-core",
 "futures-executor",
@ -1132,9 +1134,9 @@ dependencies = [

 [[package]]
 name = "futures-util"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
+checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
 dependencies = [
 "futures-channel",
 "futures-core",
@ -1885,6 +1887,7 @@ dependencies = [
 "influxdb_line_protocol",
 "internal_types",
 "observability_deps",
+ "parking_lot",
 "snafu",
 "string-interner",
 "test_helpers",
@ -2298,7 +2301,7 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "4.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=e69478a890b1e4eee49b540b69b2711d170a0433#e69478a890b1e4eee49b540b69b2711d170a0433"
+source = "git+https://github.com/apache/arrow.git?rev=00a443629c00079ea03c0b9f415d74669d2759a7#00a443629c00079ea03c0b9f415d74669d2759a7"
 dependencies = [
 "arrow",
 "base64 0.12.3",
@ -2331,6 +2334,7 @@ dependencies = [
 "bytes",
 "data_types",
 "futures",
+ "internal_types",
 "object_store",
 "parking_lot",
 "snafu",
@ -2646,6 +2650,7 @@ dependencies = [
 "futures",
 "influxdb_line_protocol",
 "internal_types",
+ "libc",
 "observability_deps",
 "parking_lot",
 "snafu",
@ -2880,9 +2885,9 @@ dependencies = [

 [[package]]
 name = "reqwest"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf12057f289428dbf5c591c74bf10392e4a8003f993405a902f20117019022d4"
+checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
 dependencies = [
 "base64 0.13.0",
 "bytes",
@ -3117,9 +3122,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"

 [[package]]
 name = "sct"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3042af939fca8c3453b7af0f1c66e533a15a86169e39de2657310ade8f98d3c"
+checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce"
 dependencies = [
 "ring",
 "untrusted",
@ -3260,13 +3265,16 @@ dependencies = [
 "bytes",
 "chrono",
 "crc32fast",
+ "criterion",
 "data_types",
 "flatbuffers",
+ "flate2",
 "futures",
 "generated_types",
 "influxdb_line_protocol",
 "internal_types",
 "mutable_buffer",
+ "num_cpus",
 "object_store",
 "observability_deps",
 "parking_lot",
@ -3277,6 +3285,7 @@ dependencies = [
 "serde_json",
 "snafu",
 "snap",
+ "tempfile",
 "test_helpers",
 "tokio",
 "tokio-util",
@ -3756,9 +3765,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"

 [[package]]
 name = "tokio"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
+checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
 dependencies = [
 "autocfg",
 "bytes",
@ -3819,9 +3828,9 @@ dependencies = [

 [[package]]
 name = "tokio-util"
-version = "0.6.5"
+version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5143d049e85af7fbc36f5454d990e62c2df705b3589f123b71f441b6b59f443f"
+checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e"
 dependencies = [
 "bytes",
 "futures-core",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -71,7 +71,7 @@ csv = "1.1"
 dirs = "3.0.1"
 dotenv = "0.15.0"
 flate2 = "1.0"
-futures = "0.3.1"
+futures = "0.3"
 http = "0.2.0"
 hyper = "0.14"
 once_cell = { version = "1.4.0", features = ["parking_lot"] }
--- a/arrow_deps/Cargo.toml
+++ b/arrow_deps/Cargo.toml
@ -8,14 +8,14 @@ description = "Apache Arrow / Parquet / DataFusion dependencies for InfluxDB IOx
 [dependencies] # In alphabetical order
 # We are using development version of arrow/parquet/datafusion and the dependencies are at the same rev

-# The version can be found here: https://github.com/apache/arrow/commit/e69478a890b1e4eee49b540b69b2711d170a0433
+# The version can be found here: https://github.com/apache/arrow/commit/00a443629c00079ea03c0b9f415d74669d2759a7
 #
-arrow = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433" , features = ["simd"] }
-arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433" }
+arrow = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7" , features = ["simd"] }
+arrow-flight = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7" }

 # Turn off optional datafusion features (function packages)
-datafusion = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433", default-features = false }
+datafusion = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7", default-features = false }

 # Turn off the "arrow" feature; it currently has a bug that causes the crate to rebuild every time
 # and we're not currently using it anyway
-parquet = { git = "https://github.com/apache/arrow.git", rev = "e69478a890b1e4eee49b540b69b2711d170a0433", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
+parquet = { git = "https://github.com/apache/arrow.git", rev = "00a443629c00079ea03c0b9f415d74669d2759a7", default-features = false, features = ["snap", "brotli", "flate2", "lz4", "zstd"] }
--- a/arrow_deps/src/test_util.rs
+++ b/arrow_deps/src/test_util.rs
@ -29,6 +29,46 @@ macro_rules! assert_table_eq {
    };
 }

+/// Compares formatted output of a record batch with an expected
+/// vector of strings in a way that order does not matter.
+/// This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_sorted_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let mut expected_lines: Vec<String> = $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        // sort except for header + footer
+        let num_lines = expected_lines.len();
+        if num_lines > 3 {
+            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        let formatted = arrow_deps::arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
+        // fix for windows: \r\n -->
+
+        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        // sort except for header + footer
+        let num_lines = actual_lines.len();
+        if num_lines > 3 {
+            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
 // sort a record batch by all columns (to provide a stable output order for test
 // comparison)
 pub fn sort_record_batch(batch: RecordBatch) -> RecordBatch {
--- a/data_types/src/chunk.rs
+++ b/data_types/src/chunk.rs
@ -21,8 +21,11 @@ pub enum ChunkStorage {
    /// The chunk is in the Read Buffer (where it can not be mutated)
    ReadBuffer,

+    /// The chunk is both in ReadBuffer and Object Store
+    ReadBufferAndObjectStore,
+
    /// The chunk is stored in Object Storage (where it can not be mutated)
-    ObjectStore,
+    ObjectStoreOnly,
 }

 impl ChunkStorage {
@ -32,7 +35,8 @@ impl ChunkStorage {
            Self::OpenMutableBuffer => "OpenMutableBuffer",
            Self::ClosedMutableBuffer => "ClosedMutableBuffer",
            Self::ReadBuffer => "ReadBuffer",
-            Self::ObjectStore => "ObjectStore",
+            Self::ReadBufferAndObjectStore => "ReadBufferAndObjectStore",
+            Self::ObjectStoreOnly => "ObjectStoreOnly",
        }
    }
 }
@ -134,7 +138,8 @@ impl From<ChunkStorage> for management::ChunkStorage {
            ChunkStorage::OpenMutableBuffer => Self::OpenMutableBuffer,
            ChunkStorage::ClosedMutableBuffer => Self::ClosedMutableBuffer,
            ChunkStorage::ReadBuffer => Self::ReadBuffer,
-            ChunkStorage::ObjectStore => Self::ObjectStore,
+            ChunkStorage::ReadBufferAndObjectStore => Self::ReadBufferAndObjectStore,
+            ChunkStorage::ObjectStoreOnly => Self::ObjectStoreOnly,
        }
    }
 }
@ -204,7 +209,10 @@ impl TryFrom<management::ChunkStorage> for ChunkStorage {
            management::ChunkStorage::OpenMutableBuffer => Ok(Self::OpenMutableBuffer),
            management::ChunkStorage::ClosedMutableBuffer => Ok(Self::ClosedMutableBuffer),
            management::ChunkStorage::ReadBuffer => Ok(Self::ReadBuffer),
-            management::ChunkStorage::ObjectStore => Ok(Self::ObjectStore),
+            management::ChunkStorage::ReadBufferAndObjectStore => {
+                Ok(Self::ReadBufferAndObjectStore)
+            }
+            management::ChunkStorage::ObjectStoreOnly => Ok(Self::ObjectStoreOnly),
            management::ChunkStorage::Unspecified => Err(FieldViolation::required("")),
        }
    }
@ -220,7 +228,7 @@ mod test {
            partition_key: "foo".to_string(),
            id: 42,
            estimated_bytes: 1234,
-            storage: management::ChunkStorage::ObjectStore.into(),
+            storage: management::ChunkStorage::ObjectStoreOnly.into(),
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
@ -231,7 +239,7 @@ mod test {
            partition_key: Arc::new("foo".to_string()),
            id: 42,
            estimated_bytes: 1234,
-            storage: ChunkStorage::ObjectStore,
+            storage: ChunkStorage::ObjectStoreOnly,
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
@ -250,7 +258,7 @@ mod test {
            partition_key: Arc::new("foo".to_string()),
            id: 42,
            estimated_bytes: 1234,
-            storage: ChunkStorage::ObjectStore,
+            storage: ChunkStorage::ObjectStoreOnly,
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
@ -262,7 +270,7 @@ mod test {
            partition_key: "foo".to_string(),
            id: 42,
            estimated_bytes: 1234,
-            storage: management::ChunkStorage::ObjectStore.into(),
+            storage: management::ChunkStorage::ObjectStoreOnly.into(),
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
--- a/data_types/src/database_rules.rs
+++ b/data_types/src/database_rules.rs
@ -181,7 +181,7 @@ pub struct LifecycleRules {
    pub mutable_size_threshold: Option<NonZeroUsize>,

    /// Once the total amount of buffered data in memory reaches this size start
-    /// dropping data from memory based on the drop_order
+    /// dropping data from memory based on the [`sort_order`](Self::sort_order)
    pub buffer_size_soft: Option<NonZeroUsize>,

    /// Once the amount of data in memory reaches this size start
@ -199,6 +199,9 @@ pub struct LifecycleRules {
    /// Allow dropping data that has not been persisted to object storage
    pub drop_non_persisted: bool,

+    /// Persists chunks to object storage.
+    pub persist: bool,
+
    /// Do not allow writing new data to this database
    pub immutable: bool,
 }
@ -228,6 +231,7 @@ impl From<LifecycleRules> for management::LifecycleRules {
                .unwrap_or_default(),
            sort_order: Some(config.sort_order.into()),
            drop_non_persisted: config.drop_non_persisted,
+            persist: config.persist,
            immutable: config.immutable,
        }
    }
@ -245,6 +249,7 @@ impl TryFrom<management::LifecycleRules> for LifecycleRules {
            buffer_size_hard: (proto.buffer_size_hard as usize).try_into().ok(),
            sort_order: proto.sort_order.optional("sort_order")?.unwrap_or_default(),
            drop_non_persisted: proto.drop_non_persisted,
+            persist: proto.persist,
            immutable: proto.immutable,
        })
    }
@ -743,6 +748,7 @@ impl TryFrom<management::partition_template::Part> for TemplatePart {

 /// ShardId maps to a nodegroup that holds the the shard.
 pub type ShardId = u16;
+pub const NO_SHARD_CONFIG: Option<&ShardConfig> = None;

 /// Assigns a given line to a specific shard id.
 pub trait Sharder {
@ -776,6 +782,12 @@ pub struct ShardConfig {
    pub ignore_errors: bool,
 }

+impl Sharder for ShardConfig {
+    fn shard(&self, _line: &ParsedLine<'_>) -> Result<ShardId, Error> {
+        todo!("mkm to implement as part of #916");
+    }
+}
+
 /// Maps a matcher with specific target group. If the line/row matches
 /// it should be sent to the group.
 #[derive(Debug, Eq, PartialEq, Clone, Default)]
@ -1281,6 +1293,7 @@ mod tests {
            buffer_size_hard: 232,
            sort_order: None,
            drop_non_persisted: true,
+            persist: true,
            immutable: true,
        };

--- a/data_types/src/job.rs
+++ b/data_types/src/job.rs
@ -27,6 +27,13 @@ pub enum Job {
        partition_key: String,
        chunk_id: u32,
    },
+
+    /// Write a chunk from read buffer to object store
+    WriteChunk {
+        db_name: String,
+        partition_key: String,
+        chunk_id: u32,
+    },
 }

 impl From<Job> for management::operation_metadata::Job {
@ -49,6 +56,15 @@ impl From<Job> for management::operation_metadata::Job {
                partition_key,
                chunk_id,
            }),
+            Job::WriteChunk {
+                db_name,
+                partition_key,
+                chunk_id,
+            } => Self::WriteChunk(management::WriteChunk {
+                db_name,
+                partition_key,
+                chunk_id,
+            }),
        }
    }
 }
@ -74,6 +90,15 @@ impl From<management::operation_metadata::Job> for Job {
                partition_key,
                chunk_id,
            },
+            Job::WriteChunk(management::WriteChunk {
+                db_name,
+                partition_key,
+                chunk_id,
+            }) => Self::WriteChunk {
+                db_name,
+                partition_key,
+                chunk_id,
+            },
        }
    }
 }
--- a/data_types/src/timestamp.rs
+++ b/data_types/src/timestamp.rs
@ -12,6 +12,7 @@ pub struct TimestampRange {

 impl TimestampRange {
    pub fn new(start: i64, end: i64) -> Self {
+        debug_assert!(end > start);
        Self { start, end }
    }

@ -26,6 +27,12 @@ impl TimestampRange {
    pub fn contains_opt(&self, v: Option<i64>) -> bool {
        Some(true) == v.map(|ts| self.contains(ts))
    }
+
+    #[inline]
+    /// Returns if this range is disjoint w.r.t the provided range
+    pub fn disjoint(&self, other: &Self) -> bool {
+        self.end <= other.start || self.start >= other.end
+    }
 }

 #[cfg(test)]
@ -55,4 +62,18 @@ mod tests {

        assert!(!range.contains_opt(None));
    }
+
+    #[test]
+    fn test_disjoint() {
+        let r1 = TimestampRange::new(100, 200);
+        let r2 = TimestampRange::new(200, 300);
+        let r3 = TimestampRange::new(150, 250);
+
+        assert!(r1.disjoint(&r2));
+        assert!(r2.disjoint(&r1));
+        assert!(!r1.disjoint(&r3));
+        assert!(!r3.disjoint(&r1));
+        assert!(!r2.disjoint(&r3));
+        assert!(!r3.disjoint(&r2));
+    }
 }
--- a/docs/multi_core_tasks.md
+++ b/docs/multi_core_tasks.md
@ -6,7 +6,7 @@ As discussed on https://github.com/influxdata/influxdb_iox/pull/221 and https://

 1. Use only async I/O via `tokio` for socket communication. It is ok to use either blocking (e.g. `std::fs::File`) or async APIs (e.g. `tokio::fs::File`) for local File I/O.

-2. All CPU bound tasks should be scheduled on the separate application level `thread_pool` not with `tokio::task::spawn` nor `tokio::task::spawn_blocking` nor a new threadpool.
+2. All CPU bound tasks should be scheduled on the separate application level `thread_pool` (which can be another tokio executor but should be separate from the executor that handles I/O).

 We will work, over time, to migrate the rest of the codebase to use these patterns.

@ -41,11 +41,11 @@ It is ok to use either blocking (e.g. `std::fs::File`) or  async APIs for local

 This can not always be done (e.g. with a library such as parquet writer which is not `async`). In such cases, using `tokio::task::spawn_blocking` should be used to perform the file I/O.

-### All CPU heavy work should be done on the single app level worker pool, separate from the tokio runtime
+### All CPU heavy work should be done on the single app level worker pool, separate from the tokio runtime handling IO

-**What**: All CPU heavy work should be done on the single app level worker pool. We provide a `thread_pool` interface that interacts nicely with async tasks (e.g. that allows an async task to `await` for a CPU heavy task to complete).
+**What**: All CPU heavy work should be done on the app level worker pool. We provide a `thread_pool` interface that interacts nicely with async tasks (e.g. that allows an async task to `await` for a CPU heavy task to complete).

-**Rationale**: A single app level worker pool gives us a single place to control work priority, eventually, so that tasks such as compaction of large data files can have lower precedence than incoming queries. By using a different pool than the tokio runtime, with a limited number of threads, we avoid over-saturating the CPU with OS threads and thereby starving the limited number tokio I/O threads. A separate, single app level pool also limits the number of underlying OS CPU threads which are spawned, even under heavy load, keeping thread context switching overhead low.
+**Rationale**: A single app level worker pool gives us a single place to control work priority, eventually, so that tasks such as compaction of large data files can have lower precedence than incoming queries. By using a different pool than the main tokio runtime, with a limited number of threads, we avoid over-saturating the CPU with OS threads and thereby starving the limited number tokio I/O threads. A separate, single app level pool also limits the number of underlying OS CPU threads which are spawned, even under heavy load, keeping thread context switching overhead low.

 There will, of course, always be a judgment call to be made of where "CPU bound work" starts and "work acceptable for I/O processing"  ends. A reasonable rule of thumb is if a job will *always* be completed in less than 100ms then that is probably fine for an I/O thread). This number may be revised as we tune the system.

--- a/docs/regenerating_flatbuffers.md
+++ b/docs/regenerating_flatbuffers.md
@ -5,3 +5,6 @@ When updating the version of the [flatbuffers](https://crates.io/crates/flatbuff
 To update the generated code, edit `generated_types/regenerate-flatbuffers.sh` and set the `FB_COMMIT` variable at the top of the file to the commit SHA of the same commit in the [flatbuffers repository](https://github.com/google/flatbuffers) where the `flatbuffers` Rust crate version was updated. This ensures we'll be [using the same version of `flatc` that the crate was tested with](https://github.com/google/flatbuffers/issues/6199#issuecomment-714562121).

 Then run the `generated_types/regenerate-flatbuffers.sh` script and check in any changes. Check the whole project builds.
+
+`generated_types/regenerate-flatbuffers.sh` will build `flatc` from source if it cannot be found.
+In order to do that your system will require `bazel`; you can likely install this with your favourite package manager.
--- a/generated_types/Cargo.toml
+++ b/generated_types/Cargo.toml
@ -9,7 +9,7 @@ bytes = { version = "1.0", features = ["serde"] }
 # See docs/regenerating_flatbuffers.md about updating generated code when updating the
 # version of the flatbuffers crate
 flatbuffers = "0.8"
-futures = "0.3.1"
+futures = "0.3"
 prost = "0.7"
 prost-types = "0.7"
 tonic = "0.4"
--- a/generated_types/protos/influxdata/iox/management/v1/chunk.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/chunk.proto
@ -17,8 +17,11 @@ enum ChunkStorage {
  // The chunk is in the Read Buffer (where it can not be mutated)
  CHUNK_STORAGE_READ_BUFFER = 3;

+  // The chunk is in the Read Buffer and Object Store
+  CHUNK_STORAGE_READ_BUFFER_AND_OBJECT_STORE = 4;
+
  // The chunk is stored in Object Storage (where it can not be mutated)
-  CHUNK_STORAGE_OBJECT_STORE = 4;
+  CHUNK_STORAGE_OBJECT_STORE_ONLY = 5;
 }

 // `Chunk` represents part of a partition of data in a database.
--- a/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/database_rules.proto
@ -151,6 +151,9 @@ message LifecycleRules {
  // Allow dropping data that has not been persisted to object storage
  bool drop_non_persisted = 7;

+  // Persists chunks to object storage.
+  bool persist = 9;
+
  // Do not allow writing new data to this database
  bool immutable = 8;
 }
--- a/generated_types/protos/influxdata/iox/management/v1/jobs.proto
+++ b/generated_types/protos/influxdata/iox/management/v1/jobs.proto
@ -19,6 +19,7 @@ message OperationMetadata {
    Dummy dummy = 5;
    PersistSegment persist_segment = 6;
    CloseChunk close_chunk = 7;
+    WriteChunk write_chunk = 8;
  }
 }

@ -45,3 +46,15 @@ message CloseChunk {
  // chunk_id
  uint32 chunk_id = 3;
 }
+
+// Write a chunk from read buffer to object store
+message WriteChunk {
+  // name of the database
+  string db_name = 1;
+
+  // partition key
+  string partition_key = 2;
+
+  // chunk_id
+  uint32 chunk_id = 3;
+}
--- a/generated_types/protos/influxdata/iox/write/v1/service.proto
+++ b/generated_types/protos/influxdata/iox/write/v1/service.proto
@ -4,7 +4,12 @@ package influxdata.iox.write.v1;

 service WriteService {
  // write data into a specific Database
-  rpc Write(WriteRequest) returns (WriteResponse);
+  rpc Write(WriteRequest) returns (WriteResponse) {
+    option deprecated = true;
+  };
+
+  // write an entry into a Database
+  rpc WriteEntry(WriteEntryRequest) returns (WriteEntryResponse);
 }

 message WriteRequest {
@ -21,3 +26,17 @@ message WriteResponse {
  // how many lines were parsed and written into the database
  uint64 lines_written = 1;
 }
+
+
+message WriteEntryRequest {
+  // name of database into which to write
+  string db_name = 1;
+
+  // entry, in serialized flatbuffers [Entry] format
+  //
+  // [Entry](https://github.com/influxdata/influxdb_iox/blob/main/generated_types/protos/influxdata/iox/write/v1/entry.fbs)
+  bytes entry = 2;
+}
+
+message WriteEntryResponse {
+}
--- a/generated_types/regenerate-flatbuffers.sh
+++ b/generated_types/regenerate-flatbuffers.sh
@ -1,11 +1,29 @@
 #!/bin/bash -e

-# The commit where the Rust `flatbuffers` crate version was changed to the version in `Cargo.lock`
-# Update this, rerun this script, and check in the changes in the generated code when the
-# `flatbuffers` crate version is updated.
+# Instructions
+#
+# If you have changed some `*.fbs` files:
+#
+# - Run this script to regenerate the corresponding Rust code.
+# - Run `cargo test` to make sure everything works as you would expect.
+# - Check in the changes to the generated code along with your changes to the `*.fbs` files.
+# - You should not need to edit this script.
+#
+# If you are updating the version of the `flatbuffers` crate in `Cargo.lock`:
+#
+# - The `flatbuffers` crate gets developed in sync with the `flatc` compiler in the same repo,
+#   so when updating the `flatbuffers` crate we also need to update the `flatc` compiler we're
+#   using.
+# - Go to https://github.com/google/flatbuffers/blame/master/rust/flatbuffers/Cargo.toml and find
+#   the commit SHA where the `version` metadata was updated to the version of the `flatbuffers`
+#   crate we now want to have in our `Cargo.lock`.
+# - Put that commit SHA in this variable:
 FB_COMMIT="86401e078d0746d2381735415f8c2dfe849f3f52"
+# - Run this script to regenerate the corresponding Rust code.
+# - Run `cargo test` to make sure everything works as you would expect.
+# - Check in the changes to the generated code along with your changes to the `Cargo.lock` file and
+#   this script.

-# Change to the generated_types crate directory, where this script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 pushd $DIR

--- a/influxdb2_client/Cargo.toml
+++ b/influxdb2_client/Cargo.toml
@ -6,7 +6,7 @@ edition = "2018"

 [dependencies] # In alphabetical order
 bytes = { version = "1.0", default-features = false }
-futures = { version = "0.3.5", default-features = false }
+futures = { version = "0.3", default-features = false }
 reqwest = { version = "0.11", features = ["stream", "json"] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.44"
--- a/internal_types/benches/benchmark.rs
+++ b/internal_types/benches/benchmark.rs
@ -1,8 +1,7 @@
-use chrono::{DateTime, Utc};
 use criterion::{criterion_group, criterion_main, Criterion};
-use data_types::database_rules::{Error as DataError, Partitioner, Sharder};
-use influxdb_line_protocol::ParsedLine;
-use internal_types::entry::{lines_to_sharded_entries, SequencedEntry};
+use data_types::database_rules::ShardConfig;
+use internal_types::entry::test_helpers::partitioner;
+use internal_types::entry::{lines_to_sharded_entries, ClockValue, SequencedEntry};

 static LINES: &str = include_str!("../../tests/fixtures/lineproto/prometheus.lp");

@ -12,7 +11,8 @@ fn sequenced_entry(c: &mut Criterion) {
    let lines = influxdb_line_protocol::parse_lines(LINES)
        .collect::<Result<Vec<_>, _>>()
        .unwrap();
-    let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+    let shard_config: Option<&ShardConfig> = None;
+    let sharded_entries = lines_to_sharded_entries(&lines, shard_config, &partitioner(1)).unwrap();
    let entry = &sharded_entries.first().unwrap().entry;
    let data = entry.data();
    assert_eq!(
@ -28,10 +28,13 @@ fn sequenced_entry(c: &mut Criterion) {
        554
    );

+    let clock_value = ClockValue::new(23);
+
    group.bench_function("new_from_entry_bytes", |b| {
        b.iter(|| {
-            let sequenced_entry = SequencedEntry::new_from_entry_bytes(23, 2, data).unwrap();
-            assert_eq!(sequenced_entry.clock_value(), 23);
+            let sequenced_entry =
+                SequencedEntry::new_from_entry_bytes(clock_value, 2, data).unwrap();
+            assert_eq!(sequenced_entry.clock_value(), clock_value);
            assert_eq!(sequenced_entry.writer_id(), 2);
        })
    });
@ -42,50 +45,3 @@ fn sequenced_entry(c: &mut Criterion) {
 criterion_group!(benches, sequenced_entry);

 criterion_main!(benches);
-
-fn sharder(count: u16) -> TestSharder {
-    TestSharder {
-        count,
-        n: std::cell::RefCell::new(0),
-    }
-}
-
-// For each line passed to shard returns a shard id from [0, count) in order
-struct TestSharder {
-    count: u16,
-    n: std::cell::RefCell<u16>,
-}
-
-impl Sharder for TestSharder {
-    fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
-        let n = *self.n.borrow();
-        self.n.replace(n + 1);
-        Ok(n % self.count)
-    }
-}
-
-fn partitioner(count: u8) -> TestPartitioner {
-    TestPartitioner {
-        count,
-        n: std::cell::RefCell::new(0),
-    }
-}
-
-// For each line passed to partition_key returns a key with a number from [0,
-// count)
-struct TestPartitioner {
-    count: u8,
-    n: std::cell::RefCell<u8>,
-}
-
-impl Partitioner for TestPartitioner {
-    fn partition_key(
-        &self,
-        _line: &ParsedLine<'_>,
-        _default_time: &DateTime<Utc>,
-    ) -> data_types::database_rules::Result<String> {
-        let n = *self.n.borrow();
-        self.n.replace(n + 1);
-        Ok(format!("key_{}", n % self.count))
-    }
-}
--- a/internal_types/src/entry.rs
+++ b/internal_types/src/entry.rs
@ -2,7 +2,7 @@
 //! from line protocol and the `DatabaseRules` configuration.

 use crate::schema::TIME_COLUMN_NAME;
-use data_types::database_rules::{Error as DataError, Partitioner, ShardId, Sharder};
+use data_types::database_rules::{Error as DataError, Partitioner, ShardId, Sharder, WriterId};
 use generated_types::entry as entry_fb;
 use influxdb_line_protocol::{FieldValue, ParsedLine};

@ -56,14 +56,17 @@ type ColumnResult<T, E = ColumnError> = std::result::Result<T, E>;
 /// underlying flatbuffers bytes generated.
 pub fn lines_to_sharded_entries(
    lines: &[ParsedLine<'_>],
-    sharder: &impl Sharder,
+    sharder: Option<&impl Sharder>,
    partitioner: &impl Partitioner,
 ) -> Result<Vec<ShardedEntry>> {
    let default_time = Utc::now();
    let mut sharded_lines = BTreeMap::new();

    for line in lines {
-        let shard_id = sharder.shard(line).context(GeneratingShardId)?;
+        let shard_id = match &sharder {
+            Some(s) => Some(s.shard(line).context(GeneratingShardId)?),
+            None => None,
+        };
        let partition_key = partitioner
            .partition_key(line, &default_time)
            .context(GeneratingPartitionKey)?;
@ -90,7 +93,7 @@ pub fn lines_to_sharded_entries(
 }

 fn build_sharded_entry(
-    shard_id: ShardId,
+    shard_id: Option<ShardId>,
    partitions: BTreeMap<String, BTreeMap<&str, Vec<&ParsedLine<'_>>>>,
    default_time: &DateTime<Utc>,
 ) -> Result<ShardedEntry> {
@ -277,10 +280,12 @@ fn build_table_write_batch<'a>(
    ))
 }

-/// Holds a shard id to the associated entry
+/// Holds a shard id to the associated entry. If there is no ShardId, then
+/// everything goes to the same place. This means a single entry will be
+/// generated from a batch of line protocol.
 #[derive(Debug)]
 pub struct ShardedEntry {
-    pub shard_id: ShardId,
+    pub shard_id: Option<ShardId>,
    pub entry: Entry,
 }

@ -337,8 +342,10 @@ pub struct PartitionWrite<'a> {
 }

 impl<'a> PartitionWrite<'a> {
-    pub fn key(&self) -> Option<&str> {
-        self.fb.key()
+    pub fn key(&self) -> &str {
+        self.fb
+            .key()
+            .expect("key must be present in the flatbuffer PartitionWrite")
    }

    pub fn table_batches(&self) -> Vec<TableBatch<'_>> {
@ -360,8 +367,10 @@ pub struct TableBatch<'a> {
 }

 impl<'a> TableBatch<'a> {
-    pub fn name(&self) -> Option<&str> {
-        self.fb.name()
+    pub fn name(&self) -> &str {
+        self.fb
+            .name()
+            .expect("name must be present in flatbuffers TableWriteBatch")
    }

    pub fn columns(&self) -> Vec<Column<'_>> {
@ -420,18 +429,32 @@ impl<'a> TableBatch<'a> {
 #[derive(Debug)]
 pub struct Column<'a> {
    fb: entry_fb::Column<'a>,
-    row_count: usize,
+    pub row_count: usize,
 }

 impl<'a> Column<'a> {
-    pub fn name(&self) -> Option<&str> {
-        self.fb.name()
+    pub fn name(&self) -> &str {
+        self.fb
+            .name()
+            .expect("name must be present in flatbuffers Column")
    }

    pub fn logical_type(&self) -> entry_fb::LogicalColumnType {
        self.fb.logical_column_type()
    }

+    pub fn is_tag(&self) -> bool {
+        self.fb.logical_column_type() == entry_fb::LogicalColumnType::Tag
+    }
+
+    pub fn is_field(&self) -> bool {
+        self.fb.logical_column_type() == entry_fb::LogicalColumnType::Field
+    }
+
+    pub fn is_time(&self) -> bool {
+        self.fb.logical_column_type() == entry_fb::LogicalColumnType::Time
+    }
+
    pub fn values(&self) -> TypedValuesIterator<'a> {
        match self.fb.values_type() {
            entry_fb::ColumnValues::BoolValues => TypedValuesIterator::Bool(BoolIterator {
@ -554,12 +577,22 @@ impl<'a> TypedValuesIterator<'a> {
            _ => None,
        }
    }
+
+    pub fn type_description(&self) -> &str {
+        match self {
+            Self::Bool(_) => "bool",
+            Self::I64(_) => "i64",
+            Self::F64(_) => "f64",
+            Self::U64(_) => "u64",
+            Self::String(_) => "String",
+        }
+    }
 }

 /// Iterator over the flatbuffers BoolValues
 #[derive(Debug)]
 pub struct BoolIterator<'a> {
-    row_count: usize,
+    pub row_count: usize,
    position: usize,
    null_mask: Option<&'a [u8]>,
    values: &'a [bool],
@ -589,7 +622,7 @@ impl<'a> Iterator for BoolIterator<'a> {
 /// Iterator over the flatbuffers I64Values, F64Values, and U64Values.
 #[derive(Debug)]
 pub struct ValIterator<'a, T: Follow<'a> + Follow<'a, Inner = T>> {
-    row_count: usize,
+    pub row_count: usize,
    position: usize,
    null_mask: Option<&'a [u8]>,
    values_iter: VectorIter<'a, T>,
@ -615,7 +648,7 @@ impl<'a, T: Follow<'a> + Follow<'a, Inner = T>> Iterator for ValIterator<'a, T>
 /// Iterator over the flatbuffers StringValues
 #[derive(Debug)]
 pub struct StringIterator<'a> {
-    row_count: usize,
+    pub row_count: usize,
    position: usize,
    null_mask: Option<&'a [u8]>,
    values: VectorIter<'a, ForwardsUOffset<&'a str>>,
@ -1087,6 +1120,19 @@ enum ColumnRaw<'a> {
    Bool(Vec<bool>),
 }

+#[derive(Debug, PartialOrd, PartialEq, Copy, Clone)]
+pub struct ClockValue(u64);
+
+impl ClockValue {
+    pub fn get(&self) -> u64 {
+        self.0
+    }
+
+    pub fn new(v: u64) -> Self {
+        Self { 0: v }
+    }
+}
+
 #[self_referencing]
 #[derive(Debug)]
 pub struct SequencedEntry {
@ -1101,7 +1147,7 @@ pub struct SequencedEntry {

 impl SequencedEntry {
    pub fn new_from_entry_bytes(
-        clock_value: u64,
+        clock_value: ClockValue,
        writer_id: u32,
        entry_bytes: &[u8],
    ) -> Result<Self> {
@ -1118,7 +1164,7 @@ impl SequencedEntry {
        let sequenced_entry = entry_fb::SequencedEntry::create(
            &mut fbb,
            &entry_fb::SequencedEntryArgs {
-                clock_value,
+                clock_value: clock_value.get(),
                writer_id,
                entry_bytes: Some(entry_bytes),
            },
@ -1151,11 +1197,11 @@ impl SequencedEntry {
        }
    }

-    pub fn clock_value(&self) -> u64 {
-        self.fb().clock_value()
+    pub fn clock_value(&self) -> ClockValue {
+        ClockValue::new(self.fb().clock_value())
    }

-    pub fn writer_id(&self) -> u32 {
+    pub fn writer_id(&self) -> WriterId {
        self.fb().writer_id()
    }
 }
@ -1180,10 +1226,133 @@ impl TryFrom<Vec<u8>> for SequencedEntry {
    }
 }

+pub mod test_helpers {
+    use super::*;
+    use chrono::TimeZone;
+    use influxdb_line_protocol::parse_lines;
+
+    // An appropriate maximum size for batches of LP to be written into IOx. Using
+    // test fixtures containing more than this many lines of LP will result in them
+    // being written as multiple writes.
+    const LP_BATCH_SIZE: usize = 10000;
+
+    /// Converts the line protocol to a single `Entry` with a single shard and
+    /// a single partition.
+    pub fn lp_to_entry(lp: &str) -> Entry {
+        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
+
+        lines_to_sharded_entries(&lines, sharder(1).as_ref(), &hour_partitioner())
+            .unwrap()
+            .pop()
+            .unwrap()
+            .entry
+    }
+
+    /// Converts the line protocol to a collection of `Entry` with a single
+    /// shard and a single partition, which is useful for testing when `lp` is
+    /// large. Batches are sized according to LP_BATCH_SIZE.
+    pub fn lp_to_entries(lp: &str) -> Vec<Entry> {
+        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
+
+        lines
+            .chunks(LP_BATCH_SIZE)
+            .map(|batch| {
+                lines_to_sharded_entries(batch, sharder(1).as_ref(), &hour_partitioner())
+                    .unwrap()
+                    .pop()
+                    .unwrap()
+                    .entry
+            })
+            .collect::<Vec<_>>()
+    }
+
+    /// Returns a test sharder that will assign shard ids from [0, count)
+    /// incrementing for each line.
+    pub fn sharder(count: u16) -> Option<TestSharder> {
+        Some(TestSharder {
+            count,
+            n: std::cell::RefCell::new(0),
+        })
+    }
+
+    // For each line passed to shard returns a shard id from [0, count) in order
+    #[derive(Debug)]
+    pub struct TestSharder {
+        count: u16,
+        n: std::cell::RefCell<u16>,
+    }
+
+    impl Sharder for TestSharder {
+        fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
+            let n = *self.n.borrow();
+            self.n.replace(n + 1);
+            Ok(n % self.count)
+        }
+    }
+
+    /// Returns a test partitioner that will partition data by the hour
+    pub fn hour_partitioner() -> HourPartitioner {
+        HourPartitioner {}
+    }
+
+    /// Returns a test partitioner that will assign partition keys in the form
+    /// key_# where # is replaced by a number `[0, count)` incrementing for
+    /// each line.
+    pub fn partitioner(count: u8) -> TestPartitioner {
+        TestPartitioner {
+            count,
+            n: std::cell::RefCell::new(0),
+        }
+    }
+
+    // For each line passed to partition_key returns a key with a number from
+    // `[0, count)`
+    #[derive(Debug)]
+    pub struct TestPartitioner {
+        count: u8,
+        n: std::cell::RefCell<u8>,
+    }
+
+    impl Partitioner for TestPartitioner {
+        fn partition_key(
+            &self,
+            _line: &ParsedLine<'_>,
+            _default_time: &DateTime<Utc>,
+        ) -> data_types::database_rules::Result<String> {
+            let n = *self.n.borrow();
+            self.n.replace(n + 1);
+            Ok(format!("key_{}", n % self.count))
+        }
+    }
+
+    // Partitions by the hour
+    #[derive(Debug)]
+    pub struct HourPartitioner {}
+
+    impl Partitioner for HourPartitioner {
+        fn partition_key(
+            &self,
+            line: &ParsedLine<'_>,
+            default_time: &DateTime<Utc>,
+        ) -> data_types::database_rules::Result<String> {
+            const HOUR_FORMAT: &str = "%Y-%m-%dT%H";
+
+            let key = match line.timestamp {
+                Some(t) => Utc.timestamp_nanos(t).format(HOUR_FORMAT),
+                None => default_time.format(HOUR_FORMAT),
+            }
+            .to_string();
+
+            Ok(key)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use super::test_helpers::*;
    use super::*;
-
+    use data_types::database_rules::NO_SHARD_CONFIG;
    use influxdb_line_protocol::parse_lines;

    #[test]
@ -1197,11 +1366,28 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(2), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(2).as_ref(), &partitioner(1)).unwrap();

        assert_eq!(sharded_entries.len(), 2);
-        assert_eq!(sharded_entries[0].shard_id, 0);
-        assert_eq!(sharded_entries[1].shard_id, 1);
+        assert_eq!(sharded_entries[0].shard_id, Some(0));
+        assert_eq!(sharded_entries[1].shard_id, Some(1));
+    }
+
+    #[test]
+    fn no_shard_config() {
+        let lp = vec![
+            "cpu,host=a,region=west user=23.1,system=66.1 123",
+            "mem,host=a,region=west used=23432 123",
+            "foo bar=true 21",
+        ]
+        .join("\n");
+        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();
+
+        let sharded_entries =
+            lines_to_sharded_entries(&lines, NO_SHARD_CONFIG, &partitioner(1)).unwrap();
+
+        assert_eq!(sharded_entries.len(), 1);
+        assert_eq!(sharded_entries[0].shard_id, None);
    }

    #[test]
@ -1215,12 +1401,12 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(2)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(2)).unwrap();

        let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
        assert_eq!(partition_writes.len(), 2);
-        assert_eq!(partition_writes[0].key().unwrap(), "key_0");
-        assert_eq!(partition_writes[1].key().unwrap(), "key_1");
+        assert_eq!(partition_writes[0].key(), "key_0");
+        assert_eq!(partition_writes[1].key(), "key_1");
    }

    #[test]
@ -1236,15 +1422,15 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
        let table_batches = partition_writes[0].table_batches();

        assert_eq!(table_batches.len(), 3);
-        assert_eq!(table_batches[0].name().unwrap(), "cpu");
-        assert_eq!(table_batches[1].name().unwrap(), "disk");
-        assert_eq!(table_batches[2].name().unwrap(), "mem");
+        assert_eq!(table_batches[0].name(), "cpu");
+        assert_eq!(table_batches[1].name(), "disk");
+        assert_eq!(table_batches[2].name(), "mem");
    }

    #[test]
@ -1253,7 +1439,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let partition_writes = sharded_entries[0].entry.partition_writes().unwrap();
        let table_batches = partition_writes[0].table_batches();
@ -1263,22 +1449,22 @@ mod tests {

        assert_eq!(columns.len(), 5);

-        assert_eq!(columns[0].name().unwrap(), "host");
+        assert_eq!(columns[0].name(), "host");
        assert_eq!(columns[0].logical_type(), entry_fb::LogicalColumnType::Tag);

-        assert_eq!(columns[1].name().unwrap(), "region");
+        assert_eq!(columns[1].name(), "region");
        assert_eq!(columns[1].logical_type(), entry_fb::LogicalColumnType::Tag);

-        assert_eq!(columns[2].name().unwrap(), "time");
+        assert_eq!(columns[2].name(), "time");
        assert_eq!(columns[2].logical_type(), entry_fb::LogicalColumnType::Time);

-        assert_eq!(columns[3].name().unwrap(), "val");
+        assert_eq!(columns[3].name(), "val");
        assert_eq!(
            columns[3].logical_type(),
            entry_fb::LogicalColumnType::Field
        );

-        assert_eq!(columns[4].name().unwrap(), "val2");
+        assert_eq!(columns[4].name(), "val2");
        assert_eq!(
            columns[4].logical_type(),
            entry_fb::LogicalColumnType::Field
@ -1295,7 +1481,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let partition_writes = sharded_entries
            .first()
@ -1312,17 +1498,17 @@ mod tests {
        assert_eq!(columns.len(), 7);

        let col = columns.get(0).unwrap();
-        assert_eq!(col.name().unwrap(), "bval");
+        assert_eq!(col.name(), "bval");
        let values = col.values().bool_values().unwrap();
        assert_eq!(&values, &[Some(true), Some(false)]);

        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "fval");
+        assert_eq!(col.name(), "fval");
        let values = col.values().f64_values().unwrap();
        assert_eq!(&values, &[Some(1.2), Some(2.2)]);

        let col = columns.get(2).unwrap();
-        assert_eq!(col.name().unwrap(), "host");
+        assert_eq!(col.name(), "host");
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
            _ => panic!("wrong type"),
@ -1331,12 +1517,12 @@ mod tests {
        assert_eq!(&values, &[Some("a"), Some("b")]);

        let col = columns.get(3).unwrap();
-        assert_eq!(col.name().unwrap(), "ival");
+        assert_eq!(col.name(), "ival");
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(23), Some(22)]);

        let col = columns.get(4).unwrap();
-        assert_eq!(col.name().unwrap(), "sval");
+        assert_eq!(col.name(), "sval");
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
            _ => panic!("wrong type"),
@ -1345,12 +1531,12 @@ mod tests {
        assert_eq!(&values, &[Some("hi"), Some("world")]);

        let col = columns.get(5).unwrap();
-        assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
+        assert_eq!(col.name(), TIME_COLUMN_NAME);
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(1), Some(2)]);

        let col = columns.get(6).unwrap();
-        assert_eq!(col.name().unwrap(), "uval");
+        assert_eq!(col.name(), "uval");
        let values = col.values().u64_values().unwrap();
        assert_eq!(&values, &[Some(7), Some(1)]);
    }
@ -1366,7 +1552,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let partition_writes = sharded_entries
            .first()
@ -1383,13 +1569,13 @@ mod tests {
        assert_eq!(columns.len(), 7);

        let col = columns.get(0).unwrap();
-        assert_eq!(col.name().unwrap(), "bool");
+        assert_eq!(col.name(), "bool");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().bool_values().unwrap();
        assert_eq!(&values, &[None, None, Some(true)]);

        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "host");
+        assert_eq!(col.name(), "host");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1399,7 +1585,7 @@ mod tests {
        assert_eq!(&values, &[Some("a"), Some("a"), None]);

        let col = columns.get(2).unwrap();
-        assert_eq!(col.name().unwrap(), "region");
+        assert_eq!(col.name(), "region");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1409,7 +1595,7 @@ mod tests {
        assert_eq!(&values, &[None, Some("west"), None]);

        let col = columns.get(3).unwrap();
-        assert_eq!(col.name().unwrap(), "string");
+        assert_eq!(col.name(), "string");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1419,19 +1605,19 @@ mod tests {
        assert_eq!(&values, &[None, None, Some("hello")]);

        let col = columns.get(4).unwrap();
-        assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
+        assert_eq!(col.name(), TIME_COLUMN_NAME);
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Time);
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(983), Some(2343), Some(222)]);

        let col = columns.get(5).unwrap();
-        assert_eq!(col.name().unwrap(), "val");
+        assert_eq!(col.name(), "val");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(23), None, Some(21)]);

        let col = columns.get(6).unwrap();
-        assert_eq!(col.name().unwrap(), "val2");
+        assert_eq!(col.name(), "val2");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().f64_values().unwrap();
        assert_eq!(&values, &[None, Some(23.2), None]);
@ -1491,7 +1677,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
        let partition_writes = sharded_entries
            .first()
            .unwrap()
@ -1504,7 +1690,7 @@ mod tests {

        assert_eq!(batch.row_count(), 1);
        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "val");
+        assert_eq!(col.name(), "val");
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(1)]);

@ -1522,7 +1708,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
        let partition_writes = sharded_entries
            .first()
            .unwrap()
@ -1535,7 +1721,7 @@ mod tests {

        assert_eq!(batch.row_count(), 8);
        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "val");
+        assert_eq!(col.name(), "val");
        let values = col.values().i64_values().unwrap();
        assert_eq!(
            &values,
@ -1566,7 +1752,7 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();
        let partition_writes = sharded_entries
            .first()
            .unwrap()
@ -1579,7 +1765,7 @@ mod tests {

        assert_eq!(batch.row_count(), 9);
        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "val");
+        assert_eq!(col.name(), "val");
        let values = col.values().i64_values().unwrap();
        assert_eq!(
            &values,
@ -1605,7 +1791,7 @@ mod tests {
        let t = Utc::now().timestamp_nanos();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let partition_writes = sharded_entries
            .first()
@ -1618,7 +1804,7 @@ mod tests {
        let columns = batch.columns();

        let col = columns.get(0).unwrap();
-        assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
+        assert_eq!(col.name(), TIME_COLUMN_NAME);
        let values = col.values().i64_values().unwrap();
        assert!(values[0].unwrap() > t);
        assert_eq!(values[1], Some(123));
@ -1629,7 +1815,8 @@ mod tests {
        let lp = vec!["a val=1i 1", "a val=2.1 123"].join("\n");
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

-        let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1));
+        let sharded_entries =
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1));

        assert!(sharded_entries.is_err());
    }
@ -1639,7 +1826,8 @@ mod tests {
        let lp = vec!["a,host=a val=1i 1", "a host=\"b\" 123"].join("\n");
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

-        let sharded_entries = lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1));
+        let sharded_entries =
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1));

        assert!(sharded_entries.is_err());
    }
@ -1655,11 +1843,13 @@ mod tests {
        let lines: Vec<_> = parse_lines(&lp).map(|l| l.unwrap()).collect();

        let sharded_entries =
-            lines_to_sharded_entries(&lines, &sharder(1), &partitioner(1)).unwrap();
+            lines_to_sharded_entries(&lines, sharder(1).as_ref(), &partitioner(1)).unwrap();

        let entry_bytes = sharded_entries.first().unwrap().entry.data();
-        let sequenced_entry = SequencedEntry::new_from_entry_bytes(23, 2, entry_bytes).unwrap();
-        assert_eq!(sequenced_entry.clock_value(), 23);
+        let clock_value = ClockValue::new(23);
+        let sequenced_entry =
+            SequencedEntry::new_from_entry_bytes(clock_value, 2, entry_bytes).unwrap();
+        assert_eq!(sequenced_entry.clock_value(), clock_value);
        assert_eq!(sequenced_entry.writer_id(), 2);

        let partition_writes = sequenced_entry.partition_writes().unwrap();
@ -1672,13 +1862,13 @@ mod tests {
        assert_eq!(columns.len(), 7);

        let col = columns.get(0).unwrap();
-        assert_eq!(col.name().unwrap(), "bool");
+        assert_eq!(col.name(), "bool");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().bool_values().unwrap();
        assert_eq!(&values, &[None, None, Some(true)]);

        let col = columns.get(1).unwrap();
-        assert_eq!(col.name().unwrap(), "host");
+        assert_eq!(col.name(), "host");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1688,7 +1878,7 @@ mod tests {
        assert_eq!(&values, &[Some("a"), Some("a"), None]);

        let col = columns.get(2).unwrap();
-        assert_eq!(col.name().unwrap(), "region");
+        assert_eq!(col.name(), "region");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Tag);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1698,7 +1888,7 @@ mod tests {
        assert_eq!(&values, &[None, Some("west"), None]);

        let col = columns.get(3).unwrap();
-        assert_eq!(col.name().unwrap(), "string");
+        assert_eq!(col.name(), "string");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = match col.values() {
            TypedValuesIterator::String(v) => v,
@ -1708,68 +1898,21 @@ mod tests {
        assert_eq!(&values, &[None, None, Some("hello")]);

        let col = columns.get(4).unwrap();
-        assert_eq!(col.name().unwrap(), TIME_COLUMN_NAME);
+        assert_eq!(col.name(), TIME_COLUMN_NAME);
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Time);
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(983), Some(2343), Some(222)]);

        let col = columns.get(5).unwrap();
-        assert_eq!(col.name().unwrap(), "val");
+        assert_eq!(col.name(), "val");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().i64_values().unwrap();
        assert_eq!(&values, &[Some(23), None, Some(21)]);

        let col = columns.get(6).unwrap();
-        assert_eq!(col.name().unwrap(), "val2");
+        assert_eq!(col.name(), "val2");
        assert_eq!(col.logical_type(), entry_fb::LogicalColumnType::Field);
        let values = col.values().f64_values().unwrap();
        assert_eq!(&values, &[None, Some(23.2), None]);
    }
-
-    fn sharder(count: u16) -> TestSharder {
-        TestSharder {
-            count,
-            n: std::cell::RefCell::new(0),
-        }
-    }
-
-    // For each line passed to shard returns a shard id from [0, count) in order
-    struct TestSharder {
-        count: u16,
-        n: std::cell::RefCell<u16>,
-    }
-
-    impl Sharder for TestSharder {
-        fn shard(&self, _line: &ParsedLine<'_>) -> Result<u16, DataError> {
-            let n = *self.n.borrow();
-            self.n.replace(n + 1);
-            Ok(n % self.count)
-        }
-    }
-
-    fn partitioner(count: u8) -> TestPartitioner {
-        TestPartitioner {
-            count,
-            n: std::cell::RefCell::new(0),
-        }
-    }
-
-    // For each line passed to partition_key returns a key with a number from [0,
-    // count)
-    struct TestPartitioner {
-        count: u8,
-        n: std::cell::RefCell<u8>,
-    }
-
-    impl Partitioner for TestPartitioner {
-        fn partition_key(
-            &self,
-            _line: &ParsedLine<'_>,
-            _default_time: &DateTime<Utc>,
-        ) -> data_types::database_rules::Result<String> {
-            let n = *self.n.borrow();
-            self.n.replace(n + 1);
-            Ok(format!("key_{}", n % self.count))
-        }
-    }
 }
--- a/internal_types/src/schema.rs
+++ b/internal_types/src/schema.rs
@ -94,11 +94,8 @@ pub enum Error {
        source: arrow_deps::arrow::error::ArrowError,
    },

-    #[snafu(display("Schema Selection error while selecting '{}': {}", column_name, source))]
-    SelectingColumns {
-        column_name: String,
-        source: arrow_deps::arrow::error::ArrowError,
-    },
+    #[snafu(display("Column not found '{}'", column_name))]
+    ColumnNotFound { column_name: String },
 }

 fn nullable_to_str(nullability: bool) -> &'static str {
@ -470,6 +467,44 @@ impl Schema {
            }
        }
    }
+
+    /// Returns the field indexes for a given selection
+    ///
+    /// Returns an error if a corresponding column isn't found
+    pub fn select(&self, columns: &[&str]) -> Result<Vec<usize>> {
+        columns
+            .iter()
+            .map(|column_name| {
+                self.find_index_of(column_name)
+                    .ok_or_else(|| Error::ColumnNotFound {
+                        column_name: column_name.to_string(),
+                    })
+            })
+            .collect()
+    }
+
+    /// Returns the schema for a given set of column projects
+    pub fn project(&self, projection: &[usize]) -> Self {
+        let mut metadata = HashMap::with_capacity(projection.len() + 1);
+        let mut fields = Vec::with_capacity(projection.len());
+        let current_metadata = self.inner.metadata();
+        for idx in projection {
+            let (_, field) = self.field(*idx);
+            fields.push(field.clone());
+
+            if let Some(value) = current_metadata.get(field.name()) {
+                metadata.insert(field.name().clone(), value.clone());
+            }
+        }
+
+        if let Some(measurement) = current_metadata.get(MEASUREMENT_METADATA_KEY).cloned() {
+            metadata.insert(MEASUREMENT_METADATA_KEY.to_string(), measurement);
+        }
+
+        Self {
+            inner: Arc::new(ArrowSchema::new_with_metadata(fields, metadata)),
+        }
+    }
 }

 /// Valid types for InfluxDB data model, as defined in [the documentation]
@ -1180,4 +1215,58 @@ mod test {
            expected_schema, sorted_schema
        );
    }
+
+    #[test]
+    fn test_select() {
+        let schema1 = SchemaBuilder::new()
+            .influx_field("the_field", String)
+            .tag("the_tag")
+            .timestamp()
+            .measurement("the_measurement")
+            .build()
+            .unwrap();
+
+        let projection = schema1.select(&[TIME_COLUMN_NAME]).unwrap();
+
+        let schema2 = schema1.project(&projection);
+        let schema3 = Schema::try_from_arrow(Arc::clone(&schema2.inner)).unwrap();
+
+        assert_eq!(schema1.measurement(), schema2.measurement());
+        assert_eq!(schema1.measurement(), schema3.measurement());
+
+        assert_eq!(schema1.len(), 3);
+        assert_eq!(schema2.len(), 1);
+        assert_eq!(schema3.len(), 1);
+
+        assert_eq!(schema1.inner.fields().len(), 3);
+        assert_eq!(schema2.inner.fields().len(), 1);
+        assert_eq!(schema3.inner.fields().len(), 1);
+
+        let get_type = |x: &Schema, field: &str| -> InfluxColumnType {
+            let idx = x.find_index_of(field).unwrap();
+            x.field(idx).0.unwrap()
+        };
+
+        assert_eq!(
+            get_type(&schema1, TIME_COLUMN_NAME),
+            InfluxColumnType::Timestamp
+        );
+        assert_eq!(
+            get_type(&schema2, TIME_COLUMN_NAME),
+            InfluxColumnType::Timestamp
+        );
+        assert_eq!(get_type(&schema1, "the_tag"), InfluxColumnType::Tag);
+        assert_eq!(
+            get_type(&schema1, "the_field"),
+            InfluxColumnType::Field(InfluxFieldType::String)
+        );
+        assert_eq!(
+            get_type(&schema2, TIME_COLUMN_NAME),
+            InfluxColumnType::Timestamp
+        );
+        assert_eq!(
+            get_type(&schema3, TIME_COLUMN_NAME),
+            InfluxColumnType::Timestamp
+        );
+    }
 }
--- a/mutable_buffer/Cargo.toml
+++ b/mutable_buffer/Cargo.toml
@ -24,6 +24,7 @@ generated_types = { path = "../generated_types" }
 influxdb_line_protocol = { path = "../influxdb_line_protocol" }
 internal_types = { path = "../internal_types" }
 observability_deps = { path = "../observability_deps" }
+parking_lot = "0.11.1"
 snafu = "0.6.2"
 string-interner = "0.12.2"
 tokio = { version = "1.0", features = ["macros"] }
--- a/mutable_buffer/src/chunk.rs
+++ b/mutable_buffer/src/chunk.rs
@ -1,21 +1,26 @@
 //! Represents a Chunk of data (a collection of tables and their data within
 //! some chunk) in the mutable store.
-use arrow_deps::{arrow::record_batch::RecordBatch, datafusion::logical_plan::Expr};
-
-use generated_types::wal as wb;
 use std::collections::{BTreeSet, HashMap};
+use std::sync::Arc;

-use data_types::partition_metadata::TableSummary;
-use internal_types::{schema::Schema, selection::Selection};
+use snafu::{OptionExt, ResultExt, Snafu};

+use arrow_deps::arrow::record_batch::RecordBatch;
+use data_types::{database_rules::WriterId, partition_metadata::TableSummary};
+use internal_types::{
+    entry::{ClockValue, TableBatch},
+    selection::Selection,
+};
+use tracker::{MemRegistry, MemTracker};
+
+use crate::chunk::snapshot::ChunkSnapshot;
 use crate::{
-    column::Column,
-    dictionary::{Dictionary, Error as DictionaryError},
-    pred::{ChunkPredicate, ChunkPredicateBuilder},
+    dictionary::{Dictionary, Error as DictionaryError, DID},
    table::Table,
 };
-use snafu::{OptionExt, ResultExt, Snafu};
-use tracker::{MemRegistry, MemTracker};
+use parking_lot::Mutex;
+
+pub mod snapshot;

 #[derive(Debug, Snafu)]
 pub enum Error {
@ -31,57 +36,12 @@ pub enum Error {
        source: crate::table::Error,
    },

-    #[snafu(display("Error checking predicate in table {}: {}", table_id, source))]
-    PredicateCheck {
-        table_id: u32,
-        source: crate::table::Error,
-    },
-
-    #[snafu(display("Error checking predicate in table '{}': {}", table_name, source))]
-    NamedTablePredicateCheck {
-        table_name: String,
-        source: crate::table::Error,
-    },
-
-    #[snafu(display(
-        "Unsupported predicate when mutable buffer table names. Found a general expression: {:?}",
-        exprs
-    ))]
-    PredicateNotYetSupported { exprs: Vec<Expr> },
-
-    #[snafu(display("Table ID {} not found in dictionary of chunk {}", table_id, chunk))]
-    TableIdNotFoundInDictionary {
-        table_id: u32,
-        chunk: u64,
-        source: DictionaryError,
-    },
-
-    #[snafu(display(
-        "Internal error: table {} not found in dictionary of chunk {}",
-        table_name,
-        chunk_id
-    ))]
-    InternalTableNotFoundInDictionary { table_name: String, chunk_id: u32 },
-
    #[snafu(display("Table {} not found in chunk {}", table, chunk))]
-    TableNotFoundInChunk { table: u32, chunk: u64 },
-
-    #[snafu(display("Table '{}' not found in chunk {}", table_name, chunk_id))]
-    NamedTableNotFoundInChunk { table_name: String, chunk_id: u64 },
-
-    #[snafu(display("Attempt to write table batch without a name"))]
-    TableWriteWithoutName,
-
-    #[snafu(display("Value ID {} not found in dictionary of chunk {}", value_id, chunk_id))]
-    InternalColumnValueIdNotFoundInDictionary {
-        value_id: u32,
-        chunk_id: u64,
-        source: DictionaryError,
-    },
+    TableNotFoundInChunk { table: DID, chunk: u64 },

    #[snafu(display("Column ID {} not found in dictionary of chunk {}", column_id, chunk))]
    ColumnIdNotFoundInDictionary {
-        column_id: u32,
+        column_id: DID,
        chunk: u64,
        source: DictionaryError,
    },
@ -96,12 +56,6 @@ pub enum Error {
        chunk_id: u64,
        source: DictionaryError,
    },
-
-    #[snafu(display(
-        "Column '{}' is not a string tag column and thus can not list values",
-        column_name
-    ))]
-    UnsupportedColumnTypeForListingValues { column_name: String },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -109,34 +63,25 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
 #[derive(Debug)]
 pub struct Chunk {
    /// The id for this chunk
-    pub id: u32,
+    id: u32,

-    /// `dictionary` maps &str -> u32. The u32s are used in place of String or
+    /// `dictionary` maps &str -> DID. The DIDs are used in place of String or
    /// str to avoid slow string operations. The same dictionary is used for
    /// table names, tag names, tag values, and column names.
    // TODO: intern string field values too?
-    pub dictionary: Dictionary,
+    dictionary: Dictionary,

    /// map of the dictionary ID for the table name to the table
-    pub tables: HashMap<u32, Table>,
+    tables: HashMap<DID, Table>,

    /// keep track of memory used by chunk
    tracker: MemTracker,
-}

-impl Clone for Chunk {
-    fn clone(&self) -> Self {
-        // TODO: The performance of this is not great - (#635)
-        let mut ret = Self {
-            id: self.id,
-            dictionary: self.dictionary.clone(),
-            tables: self.tables.clone(),
-            tracker: self.tracker.clone_empty(),
-        };
-
-        ret.tracker.set_bytes(ret.size());
-        ret
-    }
+    /// Cached chunk snapshot
+    ///
+    /// Note: This is a mutex to allow mutation within
+    /// `Chunk::snapshot()` which only takes an immutable borrow
+    snapshot: Mutex<Option<Arc<ChunkSnapshot>>>,
 }

 impl Chunk {
@ -146,38 +91,41 @@ impl Chunk {
            dictionary: Dictionary::new(),
            tables: HashMap::new(),
            tracker: memory_registry.register(),
+            snapshot: Mutex::new(None),
        };
        chunk.tracker.set_bytes(chunk.size());
        chunk
    }

-    pub fn write_entry(&mut self, entry: &wb::WriteBufferEntry<'_>) -> Result<()> {
-        if let Some(table_batches) = entry.table_batches() {
-            for batch in table_batches {
-                self.write_table_batch(&batch)?;
-            }
-        }
+    pub fn write_table_batches(
+        &mut self,
+        clock_value: ClockValue,
+        writer_id: WriterId,
+        batches: &[TableBatch<'_>],
+    ) -> Result<()> {
+        for batch in batches {
+            let table_name = batch.name();
+            let table_id = self.dictionary.lookup_value_or_insert(table_name);

-        self.tracker.set_bytes(self.size());
+            let table = self
+                .tables
+                .entry(table_id)
+                .or_insert_with(|| Table::new(table_id));

-        Ok(())
-    }
-
-    fn write_table_batch(&mut self, batch: &wb::TableWriteBatch<'_>) -> Result<()> {
-        let table_name = batch.name().context(TableWriteWithoutName)?;
-        let table_id = self.dictionary.lookup_value_or_insert(table_name);
-
-        let table = self
-            .tables
-            .entry(table_id)
-            .or_insert_with(|| Table::new(table_id));
-
-        if let Some(rows) = batch.rows() {
+            let columns = batch.columns();
            table
-                .append_rows(&mut self.dictionary, &rows)
+                .write_columns(&mut self.dictionary, clock_value, writer_id, columns)
                .context(TableWrite { table_name })?;
        }

+        // Invalidate chunk snapshot
+        *self
+            .snapshot
+            .try_lock()
+            .expect("concurrent readers/writers to MBChunk") = None;
+
+        self.tracker.set_bytes(self.size());
+
        Ok(())
    }

@ -191,212 +139,17 @@ impl Chunk {
        }
    }

-    /// Return all the names of the tables names in this chunk that match
-    /// chunk predicate
-    pub fn table_names(&self, chunk_predicate: &ChunkPredicate) -> Result<Vec<&str>> {
-        // we don't support arbitrary expressions in chunk predicate yet
-        if !chunk_predicate.chunk_exprs.is_empty() {
-            return PredicateNotYetSupported {
-                exprs: chunk_predicate.chunk_exprs.clone(),
-            }
-            .fail();
+    /// Returns a queryable snapshot of this chunk
+    pub fn snapshot(&self) -> Arc<ChunkSnapshot> {
+        let mut guard = self.snapshot.lock();
+        if let Some(snapshot) = &*guard {
+            return Arc::clone(snapshot);
        }

-        self.tables
-            .iter()
-            .filter_map(|(&table_id, table)| {
-                // could match is good enough for this metadata query
-                match table.could_match_predicate(chunk_predicate) {
-                    Ok(true) => Some(self.dictionary.lookup_id(table_id).context(
-                        TableIdNotFoundInDictionary {
-                            table_id,
-                            chunk: self.id,
-                        },
-                    )),
-                    Ok(false) => None,
-                    Err(e) => Some(Err(e).context(PredicateCheck { table_id })),
-                }
-            })
-            .collect()
-    }
-
-    /// If the column names that match the predicate can be found
-    /// from the predicate entirely using metadata, return those
-    /// strings.
-    ///
-    /// If the predicate cannot be evaluated entirely with
-    /// metadata, return `Ok(None)`.
-    pub fn column_names(
-        &self,
-        table_name: &str,
-        chunk_predicate: &ChunkPredicate,
-        selection: Selection<'_>,
-    ) -> Result<Option<BTreeSet<String>>> {
-        // No support for general purpose expressions
-        if !chunk_predicate.chunk_exprs.is_empty() {
-            return Ok(None);
-        }
-
-        let table_name_id = self.table_name_id(table_name)?;
-
-        let mut chunk_column_ids = BTreeSet::new();
-
-        // Is this table in the chunk?
-        if let Some(table) = self.tables.get(&table_name_id) {
-            for (&column_id, column) in &table.columns {
-                let column_matches_predicate = table
-                    .column_matches_predicate(&column, chunk_predicate)
-                    .context(NamedTableError { table_name })?;
-
-                if column_matches_predicate {
-                    chunk_column_ids.insert(column_id);
-                }
-            }
-        }
-
-        // Only return subset of these selection_cols if not all_cols
-        let mut all_cols = true;
-        let selection_cols = match selection {
-            Selection::All => &[""],
-            Selection::Some(cols) => {
-                all_cols = false;
-                cols
-            }
-        };
-
-        let mut column_names = BTreeSet::new();
-        for &column_id in &chunk_column_ids {
-            let column_name =
-                self.dictionary
-                    .lookup_id(column_id)
-                    .context(ColumnIdNotFoundInDictionary {
-                        column_id,
-                        chunk: self.id,
-                    })?;
-
-            if !column_names.contains(column_name)
-                && (all_cols || selection_cols.contains(&column_name))
-            {
-                // only use columns in selection_cols
-                column_names.insert(column_name.to_string());
-            }
-        }
-
-        Ok(Some(column_names))
-    }
-
-    /// Return the id of the table in the chunk's dictionary
-    fn table_name_id(&self, table_name: &str) -> Result<u32> {
-        self.dictionary
-            .id(table_name)
-            .context(InternalTableNotFoundInDictionary {
-                table_name,
-                chunk_id: self.id(),
-            })
-    }
-
-    /// Returns the strings of the specified Tag column that satisfy
-    /// the predicate, if they can be determined entirely using metadata.
-    ///
-    /// If the predicate cannot be evaluated entirely with metadata,
-    /// return `Ok(None)`.
-    pub fn tag_column_values(
-        &self,
-        table_name: &str,
-        column_name: &str,
-        chunk_predicate: &ChunkPredicate,
-    ) -> Result<Option<BTreeSet<String>>> {
-        // No support for general purpose expressions
-        if !chunk_predicate.chunk_exprs.is_empty() {
-            return Ok(None);
-        }
-        let chunk_id = self.id();
-
-        let table_name_id = self.table_name_id(table_name)?;
-
-        // Is this table even in the chunk?
-        let table = self
-            .tables
-            .get(&table_name_id)
-            .context(NamedTableNotFoundInChunk {
-                table_name,
-                chunk_id,
-            })?;
-
-        // See if we can rule out the table entire on metadata
-        let could_match = table
-            .could_match_predicate(chunk_predicate)
-            .context(NamedTablePredicateCheck { table_name })?;
-
-        if !could_match {
-            // No columns could match, return empty set
-            return Ok(Default::default());
-        }
-
-        let column_id =
-            self.dictionary
-                .lookup_value(column_name)
-                .context(ColumnNameNotFoundInDictionary {
-                    column_name,
-                    chunk_id,
-                })?;
-
-        let column = table
-            .column(column_id)
-            .context(NamedTableError { table_name })?;
-
-        if let Column::Tag(column, _) = column {
-            // if we have a timestamp predicate, find all values
-            // where the timestamp is within range. Otherwise take
-            // all values.
-
-            // Collect matching ids into BTreeSet to deduplicate on
-            // ids *before* looking up Strings
-            let column_value_ids: BTreeSet<u32> = match chunk_predicate.range {
-                None => {
-                    // take all non-null values
-                    column.iter().filter_map(|&s| s).collect()
-                }
-                Some(range) => {
-                    // filter out all values that don't match the timestmap
-                    let time_column = table
-                        .column_i64(chunk_predicate.time_column_id)
-                        .context(NamedTableError { table_name })?;
-
-                    column
-                        .iter()
-                        .zip(time_column.iter())
-                        .filter_map(|(&column_value_id, &timestamp_value)| {
-                            if range.contains_opt(timestamp_value) {
-                                column_value_id
-                            } else {
-                                None
-                            }
-                        })
-                        .collect()
-                }
-            };
-
-            // convert all the (deduplicated) ids to Strings
-            let column_values = column_value_ids
-                .into_iter()
-                .map(|value_id| {
-                    let value = self.dictionary.lookup_id(value_id).context(
-                        InternalColumnValueIdNotFoundInDictionary { value_id, chunk_id },
-                    )?;
-                    Ok(value.to_string())
-                })
-                .collect::<Result<BTreeSet<String>>>()?;
-
-            Ok(Some(column_values))
-        } else {
-            UnsupportedColumnTypeForListingValues { column_name }.fail()
-        }
-    }
-
-    /// Return a builder suitable to create predicates for this Chunk
-    pub fn predicate_builder(&self) -> Result<ChunkPredicateBuilder<'_>, crate::pred::Error> {
-        ChunkPredicateBuilder::new(&self.dictionary)
+        // TODO: Incremental snapshot generation
+        let snapshot = Arc::new(ChunkSnapshot::new(self));
+        *guard = Some(Arc::clone(&snapshot));
+        snapshot
    }

    /// returns true if there is no data in this chunk
@ -420,7 +173,7 @@ impl Chunk {
        if let Some(table) = self.table(table_name)? {
            dst.push(
                table
-                    .to_arrow(&self, selection)
+                    .to_arrow(&self.dictionary, selection)
                    .context(NamedTableError { table_name })?,
            );
        }
@ -439,7 +192,7 @@ impl Chunk {

                TableSummary {
                    name: name.to_string(),
-                    columns: table.stats(&self),
+                    columns: table.stats(&self.dictionary),
                }
            })
            .collect()
@ -459,21 +212,6 @@ impl Chunk {
        Ok(table)
    }

-    /// Return Schema for the specified table / columns
-    pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
-        let table = self
-            .table(table_name)?
-            // Option --> Result
-            .context(NamedTableNotFoundInChunk {
-                table_name,
-                chunk_id: self.id(),
-            })?;
-
-        table
-            .schema(self, selection)
-            .context(NamedTableError { table_name })
-    }
-
    /// Return the approximate memory size of the chunk, in bytes including the
    /// dictionary, tables, and their rows.
    pub fn size(&self) -> usize {
@ -486,3 +224,155 @@ impl Chunk {
        matches!(self.table(table_name), Ok(Some(_)))
    }
 }
+
+pub mod test_helpers {
+    use super::*;
+    use internal_types::entry::test_helpers::lp_to_entry;
+
+    /// A helper that will write line protocol string to the passed in Chunk.
+    /// All data will be under a single partition with a clock value and
+    /// writer id of 0.
+    pub fn write_lp_to_chunk(lp: &str, chunk: &mut Chunk) -> Result<()> {
+        let entry = lp_to_entry(lp);
+
+        for w in entry.partition_writes().unwrap() {
+            chunk.write_table_batches(ClockValue::new(0), 0, &w.table_batches())?;
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::test_helpers::write_lp_to_chunk;
+    use super::*;
+    use arrow_deps::arrow::util::pretty::pretty_format_batches;
+
+    #[test]
+    fn writes_table_batches() {
+        let mr = MemRegistry::new();
+        let mut chunk = Chunk::new(1, &mr);
+
+        let lp = vec![
+            "cpu,host=a val=23 1",
+            "cpu,host=b val=2 1",
+            "mem,host=a val=23432i 1",
+        ]
+        .join("\n");
+
+        write_lp_to_chunk(&lp, &mut chunk).unwrap();
+
+        assert_table(
+            &chunk,
+            "cpu",
+            &[
+                "+------+------+-----+",
+                "| host | time | val |",
+                "+------+------+-----+",
+                "| a    | 1    | 23  |",
+                "| b    | 1    | 2   |",
+                "+------+------+-----+\n",
+            ],
+        );
+
+        assert_table(
+            &chunk,
+            "mem",
+            &[
+                "+------+------+-------+",
+                "| host | time | val   |",
+                "+------+------+-------+",
+                "| a    | 1    | 23432 |",
+                "+------+------+-------+\n",
+            ],
+        );
+
+        let lp = vec![
+            "cpu,host=c val=11 1",
+            "mem sval=\"hi\" 2",
+            "disk val=true 1",
+        ]
+        .join("\n");
+
+        write_lp_to_chunk(&lp, &mut chunk).unwrap();
+
+        assert_table(
+            &chunk,
+            "cpu",
+            &[
+                "+------+------+-----+",
+                "| host | time | val |",
+                "+------+------+-----+",
+                "| a    | 1    | 23  |",
+                "| b    | 1    | 2   |",
+                "| c    | 1    | 11  |",
+                "+------+------+-----+\n",
+            ],
+        );
+
+        assert_table(
+            &chunk,
+            "disk",
+            &[
+                "+------+------+",
+                "| time | val  |",
+                "+------+------+",
+                "| 1    | true |",
+                "+------+------+\n",
+            ],
+        );
+
+        assert_table(
+            &chunk,
+            "mem",
+            &[
+                "+------+------+------+-------+",
+                "| host | sval | time | val   |",
+                "+------+------+------+-------+",
+                "| a    |      | 1    | 23432 |",
+                "|      | hi   | 2    |       |",
+                "+------+------+------+-------+\n",
+            ],
+        );
+    }
+
+    #[test]
+    fn test_snapshot() {
+        let mr = MemRegistry::new();
+        let mut chunk = Chunk::new(1, &mr);
+
+        let lp = vec![
+            "cpu,host=a val=23 1",
+            "cpu,host=b val=2 1",
+            "mem,host=a val=23432i 1",
+        ]
+        .join("\n");
+
+        write_lp_to_chunk(&lp, &mut chunk).unwrap();
+        let s1 = chunk.snapshot();
+        let s2 = chunk.snapshot();
+
+        write_lp_to_chunk(&lp, &mut chunk).unwrap();
+        let s3 = chunk.snapshot();
+        let s4 = chunk.snapshot();
+
+        assert_eq!(Arc::as_ptr(&s1), Arc::as_ptr(&s2));
+        assert_ne!(Arc::as_ptr(&s1), Arc::as_ptr(&s3));
+        assert_eq!(Arc::as_ptr(&s3), Arc::as_ptr(&s4));
+    }
+
+    fn assert_table(chunk: &Chunk, table: &str, data: &[&str]) {
+        let mut batches = vec![];
+        chunk
+            .table_to_arrow(&mut batches, table, Selection::All)
+            .unwrap();
+        let res = pretty_format_batches(&batches).unwrap();
+        let data = data.join("\n");
+        assert_eq!(
+            res, data,
+            "\n{} table results not as expected:\nEXPECTED:\n{}\nRECEIVED:\n{}",
+            table, data, res
+        );
+    }
+}
--- a/mutable_buffer/src/chunk/snapshot.rs
+++ b/mutable_buffer/src/chunk/snapshot.rs
@ -0,0 +1,181 @@
+use std::collections::{BTreeSet, HashMap};
+use std::sync::Arc;
+
+use arrow_deps::arrow::record_batch::RecordBatch;
+use data_types::timestamp::TimestampRange;
+use internal_types::schema::{Schema, TIME_COLUMN_NAME};
+use internal_types::selection::Selection;
+use snafu::{OptionExt, ResultExt, Snafu};
+
+use super::Chunk;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Table not found: {}", table_name))]
+    TableNotFound { table_name: String },
+
+    #[snafu(display("Failed to select columns: {}", source))]
+    SelectColumns {
+        source: internal_types::schema::Error,
+    },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
+/// A queryable snapshot of a mutable buffer chunk
+#[derive(Debug)]
+pub struct ChunkSnapshot {
+    /// The ID of the chunk this is a snapshot of
+    chunk_id: u32,
+
+    /// Maps table name to `TableSnapshot`
+    records: HashMap<String, TableSnapshot>,
+    // TODO: Memory tracking
+}
+
+#[derive(Debug)]
+struct TableSnapshot {
+    schema: Schema,
+    batch: RecordBatch,
+    timestamp_range: Option<TimestampRange>,
+}
+
+impl TableSnapshot {
+    fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
+        match (self.timestamp_range, timestamp_range) {
+            (Some(a), Some(b)) => !a.disjoint(b),
+            (None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
+            // the predicate
+            (_, None) => true,
+        }
+    }
+}
+
+impl ChunkSnapshot {
+    pub fn new(chunk: &Chunk) -> Self {
+        let mut records: HashMap<String, TableSnapshot> = Default::default();
+        for (id, table) in &chunk.tables {
+            let schema = table.schema(&chunk.dictionary, Selection::All).unwrap();
+            let batch = table.to_arrow(&chunk.dictionary, Selection::All).unwrap();
+            let name = chunk.dictionary.lookup_id(*id).unwrap();
+
+            let timestamp_range = chunk
+                .dictionary
+                .lookup_value(TIME_COLUMN_NAME)
+                .ok()
+                .and_then(|column_id| {
+                    table.column(column_id).ok().and_then(|column| {
+                        // TimestampRange has an exclusive upper bound
+                        column
+                            .get_i64_stats()
+                            .map(|x| TimestampRange::new(x.min, x.max + 1))
+                    })
+                });
+
+            records.insert(
+                name.to_string(),
+                TableSnapshot {
+                    batch,
+                    schema,
+                    timestamp_range,
+                },
+            );
+        }
+
+        Self {
+            chunk_id: chunk.id,
+            records,
+        }
+    }
+
+    /// return the ID of the chunk this is a snapshot of
+    pub fn chunk_id(&self) -> u32 {
+        self.chunk_id
+    }
+
+    /// returns true if there is no data in this snapshot
+    pub fn is_empty(&self) -> bool {
+        self.records.is_empty()
+    }
+
+    /// Return true if this snapshot has the specified table name
+    pub fn has_table(&self, table_name: &str) -> bool {
+        self.records.get(table_name).is_some()
+    }
+
+    /// Return Schema for the specified table / columns
+    pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
+        let table = self
+            .records
+            .get(table_name)
+            .context(TableNotFound { table_name })?;
+
+        Ok(match selection {
+            Selection::All => table.schema.clone(),
+            Selection::Some(columns) => {
+                let columns = table.schema.select(columns).context(SelectColumns)?;
+                table.schema.project(&columns)
+            }
+        })
+    }
+
+    /// Returns a list of tables with writes matching the given timestamp_range
+    pub fn table_names(
+        &self,
+        timestamp_range: Option<TimestampRange>,
+    ) -> impl Iterator<Item = &String> + '_ {
+        self.records
+            .iter()
+            .flat_map(move |(table_name, table_snapshot)| {
+                match table_snapshot.matches_predicate(&timestamp_range) {
+                    true => Some(table_name),
+                    false => None,
+                }
+            })
+    }
+
+    /// Returns a RecordBatch with the given selection
+    pub fn read_filter(&self, table_name: &str, selection: Selection<'_>) -> Result<RecordBatch> {
+        let table = self
+            .records
+            .get(table_name)
+            .context(TableNotFound { table_name })?;
+
+        Ok(match selection {
+            Selection::All => table.batch.clone(),
+            Selection::Some(columns) => {
+                let projection = table.schema.select(columns).context(SelectColumns)?;
+                let schema = table.schema.project(&projection).into();
+                let columns = projection
+                    .into_iter()
+                    .map(|x| Arc::clone(table.batch.column(x)))
+                    .collect();
+
+                RecordBatch::try_new(schema, columns).expect("failed to project record batch")
+            }
+        })
+    }
+
+    /// Returns a given selection of column names from a table
+    pub fn column_names(
+        &self,
+        table_name: &str,
+        selection: Selection<'_>,
+    ) -> Option<BTreeSet<String>> {
+        let table = self.records.get(table_name)?;
+        let fields = table.schema.inner().fields().iter();
+
+        Some(match selection {
+            Selection::Some(cols) => fields
+                .filter_map(|x| {
+                    if cols.contains(&x.name().as_str()) {
+                        Some(x.name().clone())
+                    } else {
+                        None
+                    }
+                })
+                .collect(),
+            Selection::All => fields.map(|x| x.name().clone()).collect(),
+        })
+    }
+}
--- a/mutable_buffer/src/column.rs
+++ b/mutable_buffer/src/column.rs
@ -1,10 +1,9 @@
-use generated_types::wal as wb;
 use snafu::Snafu;

-use crate::dictionary::Dictionary;
-use arrow_deps::arrow::datatypes::DataType as ArrowDataType;
+use crate::dictionary::{Dictionary, DID};
 use data_types::partition_metadata::StatValues;
-use internal_types::data::type_description;
+use generated_types::entry::LogicalColumnType;
+use internal_types::entry::TypedValuesIterator;

 use std::mem;

@ -37,80 +36,276 @@ pub enum Column {
    U64(Vec<Option<u64>>, StatValues<u64>),
    String(Vec<Option<String>>, StatValues<String>),
    Bool(Vec<Option<bool>>, StatValues<bool>),
-    Tag(Vec<Option<u32>>, StatValues<String>),
+    Tag(Vec<Option<DID>>, StatValues<String>),
 }

 impl Column {
-    pub fn with_value(
+    /// Initializes a new column from typed values, the column on a table write
+    /// batch on an Entry. Will initialize the stats with the first
+    /// non-null value and update with any other non-null values included.
+    pub fn new_from_typed_values(
        dictionary: &mut Dictionary,
-        capacity: usize,
-        value: wb::Value<'_>,
-    ) -> Result<Self> {
-        Ok(match value.value_type() {
-            wb::ColumnValue::F64Value => {
-                let val = value
-                    .value_as_f64value()
-                    .expect("f64 value should be present")
-                    .value();
-                let mut vals = vec![None; capacity];
-                vals.push(Some(val));
-                Self::F64(vals, StatValues::new(val))
-            }
-            wb::ColumnValue::I64Value => {
-                let val = value
-                    .value_as_i64value()
-                    .expect("i64 value should be present")
-                    .value();
-                let mut vals = vec![None; capacity];
-                vals.push(Some(val));
-                Self::I64(vals, StatValues::new(val))
-            }
-            wb::ColumnValue::U64Value => {
-                let val = value
-                    .value_as_u64value()
-                    .expect("u64 value should be present")
-                    .value();
-                let mut vals = vec![None; capacity];
-                vals.push(Some(val));
-                Self::U64(vals, StatValues::new(val))
-            }
-            wb::ColumnValue::StringValue => {
-                let val = value
-                    .value_as_string_value()
-                    .expect("string value should be present")
-                    .value()
-                    .expect("string must be present");
-                let mut vals = vec![None; capacity];
-                vals.push(Some(val.to_string()));
-                Self::String(vals, StatValues::new(val.to_string()))
-            }
-            wb::ColumnValue::BoolValue => {
-                let val = value
-                    .value_as_bool_value()
-                    .expect("bool value should be present")
-                    .value();
-                let mut vals = vec![None; capacity];
-                vals.push(Some(val));
-                Self::Bool(vals, StatValues::new(val))
-            }
-            wb::ColumnValue::TagValue => {
-                let val = value
-                    .value_as_tag_value()
-                    .expect("tag value should be present")
-                    .value()
-                    .expect("tag value must have string value");
-                let mut vals = vec![None; capacity];
-                let id = dictionary.lookup_value_or_insert(val);
-                vals.push(Some(id));
-                Self::Tag(vals, StatValues::new(val.to_string()))
-            }
-            _ => {
-                return UnknownColumnType {
-                    inserted_value_type: type_description(value.value_type()),
+        row_count: usize,
+        logical_type: LogicalColumnType,
+        values: TypedValuesIterator<'_>,
+    ) -> Self {
+        match values {
+            TypedValuesIterator::String(vals) => match logical_type {
+                LogicalColumnType::Tag => {
+                    let mut tag_values = vec![None; row_count];
+                    let mut stats: Option<StatValues<String>> = None;
+
+                    let mut added_tag_values: Vec<_> = vals
+                        .map(|tag| {
+                            tag.map(|tag| {
+                                match stats.as_mut() {
+                                    Some(s) => StatValues::update_string(s, tag),
+                                    None => {
+                                        stats = Some(StatValues::new(tag.to_string()));
+                                    }
+                                }
+
+                                dictionary.lookup_value_or_insert(tag)
+                            })
+                        })
+                        .collect();
+
+                    tag_values.append(&mut added_tag_values);
+
+                    Self::Tag(
+                        tag_values,
+                        stats.expect("can't insert tag column with no values"),
+                    )
                }
-                .fail()
+                LogicalColumnType::Field => {
+                    let mut values = vec![None; row_count];
+                    let mut stats: Option<StatValues<String>> = None;
+
+                    for value in vals {
+                        match value {
+                            Some(v) => {
+                                match stats.as_mut() {
+                                    Some(s) => StatValues::update_string(s, v),
+                                    None => stats = Some(StatValues::new(v.to_string())),
+                                }
+
+                                values.push(Some(v.to_string()));
+                            }
+                            None => values.push(None),
+                        }
+                    }
+
+                    Self::String(
+                        values,
+                        stats.expect("can't insert string column with no values"),
+                    )
+                }
+                _ => panic!("unsupported!"),
+            },
+            TypedValuesIterator::I64(vals) => {
+                let mut values = vec![None; row_count];
+                let mut stats: Option<StatValues<i64>> = None;
+
+                for v in vals {
+                    if let Some(val) = v {
+                        match stats.as_mut() {
+                            Some(s) => s.update(val),
+                            None => stats = Some(StatValues::new(val)),
+                        }
+                    }
+                    values.push(v);
+                }
+
+                Self::I64(
+                    values,
+                    stats.expect("can't insert i64 column with no values"),
+                )
            }
-        })
+            TypedValuesIterator::F64(vals) => {
+                let mut values = vec![None; row_count];
+                let mut stats: Option<StatValues<f64>> = None;
+
+                for v in vals {
+                    if let Some(val) = v {
+                        match stats.as_mut() {
+                            Some(s) => s.update(val),
+                            None => stats = Some(StatValues::new(val)),
+                        }
+                    }
+                    values.push(v);
+                }
+
+                Self::F64(
+                    values,
+                    stats.expect("can't insert f64 column with no values"),
+                )
+            }
+            TypedValuesIterator::U64(vals) => {
+                let mut values = vec![None; row_count];
+                let mut stats: Option<StatValues<u64>> = None;
+
+                for v in vals {
+                    if let Some(val) = v {
+                        match stats.as_mut() {
+                            Some(s) => s.update(val),
+                            None => stats = Some(StatValues::new(val)),
+                        }
+                    }
+                    values.push(v);
+                }
+
+                Self::U64(
+                    values,
+                    stats.expect("can't insert u64 column with no values"),
+                )
+            }
+            TypedValuesIterator::Bool(vals) => {
+                let mut values = vec![None; row_count];
+                let mut stats: Option<StatValues<bool>> = None;
+
+                for v in vals {
+                    if let Some(val) = v {
+                        match stats.as_mut() {
+                            Some(s) => s.update(val),
+                            None => stats = Some(StatValues::new(val)),
+                        }
+                    }
+                    values.push(v);
+                }
+
+                Self::Bool(
+                    values,
+                    stats.expect("can't insert bool column with no values"),
+                )
+            }
+        }
+    }
+
+    /// Pushes typed values, the column from a table write batch on an Entry.
+    /// Updates statsistics for any non-null values.
+    pub fn push_typed_values(
+        &mut self,
+        dictionary: &mut Dictionary,
+        logical_type: LogicalColumnType,
+        values: TypedValuesIterator<'_>,
+    ) -> Result<()> {
+        match (self, values) {
+            (Self::Bool(col, stats), TypedValuesIterator::Bool(values)) => {
+                for val in values {
+                    if let Some(v) = val {
+                        stats.update(v)
+                    };
+                    col.push(val);
+                }
+            }
+            (Self::I64(col, stats), TypedValuesIterator::I64(values)) => {
+                for val in values {
+                    if let Some(v) = val {
+                        stats.update(v)
+                    };
+                    col.push(val);
+                }
+            }
+            (Self::F64(col, stats), TypedValuesIterator::F64(values)) => {
+                for val in values {
+                    if let Some(v) = val {
+                        stats.update(v)
+                    };
+                    col.push(val);
+                }
+            }
+            (Self::U64(col, stats), TypedValuesIterator::U64(values)) => {
+                for val in values {
+                    if let Some(v) = val {
+                        stats.update(v)
+                    };
+                    col.push(val);
+                }
+            }
+            (Self::String(col, stats), TypedValuesIterator::String(values)) => {
+                if logical_type != LogicalColumnType::Field {
+                    TypeMismatch {
+                        existing_column_type: "String",
+                        inserted_value_type: "tag",
+                    }
+                    .fail()?;
+                }
+
+                for val in values {
+                    match val {
+                        Some(v) => {
+                            StatValues::update_string(stats, v);
+                            col.push(Some(v.to_string()));
+                        }
+                        None => col.push(None),
+                    }
+                }
+            }
+            (Self::Tag(col, stats), TypedValuesIterator::String(values)) => {
+                if logical_type != LogicalColumnType::Tag {
+                    TypeMismatch {
+                        existing_column_type: "tag",
+                        inserted_value_type: "String",
+                    }
+                    .fail()?;
+                }
+
+                for val in values {
+                    match val {
+                        Some(v) => {
+                            StatValues::update_string(stats, v);
+                            let id = dictionary.lookup_value_or_insert(v);
+                            col.push(Some(id));
+                        }
+                        None => col.push(None),
+                    }
+                }
+            }
+            (existing, values) => TypeMismatch {
+                existing_column_type: existing.type_description(),
+                inserted_value_type: values.type_description(),
+            }
+            .fail()?,
+        }
+
+        Ok(())
+    }
+
+    /// Pushes None values onto the column until its len is equal to that passed
+    /// in
+    pub fn push_nulls_to_len(&mut self, len: usize) {
+        match self {
+            Self::Tag(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+            Self::I64(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+            Self::F64(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+            Self::U64(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+            Self::Bool(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+            Self::String(vals, _) => {
+                if len > vals.len() {
+                    vals.resize(len, None);
+                }
+            }
+        }
    }

    pub fn len(&self) -> usize {
@ -124,10 +319,6 @@ impl Column {
        }
    }

-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
    pub fn type_description(&self) -> &'static str {
        match self {
            Self::F64(_, _) => "f64",
@ -139,167 +330,10 @@ impl Column {
        }
    }

-    /// Return the arrow DataType for this column
-    pub fn data_type(&self) -> ArrowDataType {
+    pub fn get_i64_stats(&self) -> Option<StatValues<i64>> {
        match self {
-            Self::F64(..) => ArrowDataType::Float64,
-            Self::I64(..) => ArrowDataType::Int64,
-            Self::U64(..) => ArrowDataType::UInt64,
-            Self::String(..) => ArrowDataType::Utf8,
-            Self::Bool(..) => ArrowDataType::Boolean,
-            Self::Tag(..) => ArrowDataType::Utf8,
-        }
-    }
-
-    pub fn push(&mut self, dictionary: &mut Dictionary, value: &wb::Value<'_>) -> Result<()> {
-        let inserted = match self {
-            Self::Tag(vals, stats) => match value.value_as_tag_value() {
-                Some(tag) => {
-                    let tag_value = tag.value().expect("tag must have string value");
-                    let id = dictionary.lookup_value_or_insert(tag_value);
-                    vals.push(Some(id));
-                    StatValues::update_string(stats, tag_value);
-                    true
-                }
-                None => false,
-            },
-            Self::String(vals, stats) => match value.value_as_string_value() {
-                Some(str_val) => {
-                    let str_val = str_val.value().expect("string must have value");
-                    vals.push(Some(str_val.to_string()));
-                    StatValues::update_string(stats, str_val);
-                    true
-                }
-                None => false,
-            },
-            Self::Bool(vals, stats) => match value.value_as_bool_value() {
-                Some(bool_val) => {
-                    let bool_val = bool_val.value();
-                    vals.push(Some(bool_val));
-                    stats.update(bool_val);
-                    true
-                }
-                None => false,
-            },
-            Self::I64(vals, stats) => match value.value_as_i64value() {
-                Some(i64_val) => {
-                    let i64_val = i64_val.value();
-                    vals.push(Some(i64_val));
-                    stats.update(i64_val);
-                    true
-                }
-                None => false,
-            },
-            Self::U64(vals, stats) => match value.value_as_u64value() {
-                Some(u64_val) => {
-                    let u64_val = u64_val.value();
-                    vals.push(Some(u64_val));
-                    stats.update(u64_val);
-                    true
-                }
-                None => false,
-            },
-            Self::F64(vals, stats) => match value.value_as_f64value() {
-                Some(f64_val) => {
-                    let f64_val = f64_val.value();
-                    vals.push(Some(f64_val));
-                    stats.update(f64_val);
-                    true
-                }
-                None => false,
-            },
-        };
-
-        if inserted {
-            Ok(())
-        } else {
-            TypeMismatch {
-                existing_column_type: self.type_description(),
-                inserted_value_type: type_description(value.value_type()),
-            }
-            .fail()
-        }
-    }
-
-    // push_none_if_len_equal will add a None value to the end of the Vec of values
-    // if the length is equal to the passed in value. This is used to ensure
-    // columns are all the same length.
-    pub fn push_none_if_len_equal(&mut self, len: usize) {
-        match self {
-            Self::F64(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-            Self::I64(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-            Self::U64(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-            Self::String(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-            Self::Bool(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-            Self::Tag(v, _) => {
-                if v.len() == len {
-                    v.push(None);
-                }
-            }
-        }
-    }
-
-    /// Returns true if any rows are within the range [min_value,
-    /// max_value). Inclusive of `start`, exclusive of `end`
-    pub fn has_i64_range(&self, start: i64, end: i64) -> Result<bool> {
-        match self {
-            Self::I64(_, stats) => {
-                if stats.max < start || stats.min >= end {
-                    Ok(false)
-                } else {
-                    Ok(true)
-                }
-            }
-            _ => InternalTypeMismatchForTimePredicate {}.fail(),
-        }
-    }
-
-    /// Return true of this column's type is a Tag
-    pub fn is_tag(&self) -> bool {
-        matches!(self, Self::Tag(..))
-    }
-
-    /// Returns true if there exists at least one row idx where this
-    /// self[i] is within the range [min_value, max_value). Inclusive
-    /// of `start`, exclusive of `end` and where col[i] is non null
-    pub fn has_non_null_i64_range<T>(
-        &self,
-        column: &[Option<T>],
-        start: i64,
-        end: i64,
-    ) -> Result<bool> {
-        match self {
-            Self::I64(v, _) => {
-                for (index, val) in v.iter().enumerate() {
-                    if let Some(val) = val {
-                        if start <= *val && *val < end && column[index].is_some() {
-                            return Ok(true);
-                        }
-                    }
-                }
-                Ok(false)
-            }
-            _ => InternalTypeMismatchForTimePredicate {}.fail(),
+            Self::I64(_, values) => Some(values.clone()),
+            _ => None,
        }
    }

@ -322,7 +356,7 @@ impl Column {
                mem::size_of::<Option<bool>>() * v.len() + mem::size_of_val(&stats)
            }
            Self::Tag(v, stats) => {
-                mem::size_of::<Option<u32>>() * v.len() + mem::size_of_val(&stats)
+                mem::size_of::<Option<DID>>() * v.len() + mem::size_of_val(&stats)
            }
            Self::String(v, stats) => {
                let string_bytes_size = v
@ -334,89 +368,3 @@ impl Column {
        }
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_has_i64_range() {
-        let mut stats = StatValues::new(1);
-        stats.update(2);
-        let col = Column::I64(vec![Some(1), None, Some(2)], stats.clone());
-        assert!(!col.has_i64_range(-1, 0).unwrap());
-        assert!(!col.has_i64_range(0, 1).unwrap());
-        assert!(col.has_i64_range(1, 2).unwrap());
-        assert!(col.has_i64_range(2, 3).unwrap());
-        assert!(!col.has_i64_range(3, 4).unwrap());
-
-        let col = Column::I64(vec![Some(2), None, Some(1)], stats);
-        assert!(!col.has_i64_range(-1, 0).unwrap());
-        assert!(!col.has_i64_range(0, 1).unwrap());
-        assert!(col.has_i64_range(1, 2).unwrap());
-        assert!(col.has_i64_range(2, 3).unwrap());
-        assert!(!col.has_i64_range(3, 4).unwrap());
-    }
-
-    #[test]
-    fn test_has_i64_range_does_not_panic() {
-        // providing the wrong column type should get an internal error, not a panic
-        let col = Column::F64(vec![Some(1.2)], StatValues::new(1.2));
-        let res = col.has_i64_range(-1, 0);
-        assert!(res.is_err());
-        let res_string = format!("{:?}", res);
-        let expected = "InternalTypeMismatchForTimePredicate";
-        assert!(
-            res_string.contains(expected),
-            "Did not find expected text '{}' in '{}'",
-            expected,
-            res_string
-        );
-    }
-
-    #[test]
-    fn test_has_non_null_i64_range_() {
-        let none_col: Vec<Option<u32>> = vec![None, None, None];
-        let some_col: Vec<Option<u32>> = vec![Some(0), Some(0), Some(0)];
-
-        let mut stats = StatValues::new(1);
-        stats.update(2);
-        let col = Column::I64(vec![Some(1), None, Some(2)], stats);
-
-        assert!(!col.has_non_null_i64_range(&some_col, -1, 0).unwrap());
-        assert!(!col.has_non_null_i64_range(&some_col, 0, 1).unwrap());
-        assert!(col.has_non_null_i64_range(&some_col, 1, 2).unwrap());
-        assert!(col.has_non_null_i64_range(&some_col, 2, 3).unwrap());
-        assert!(!col.has_non_null_i64_range(&some_col, 3, 4).unwrap());
-
-        assert!(!col.has_non_null_i64_range(&none_col, -1, 0).unwrap());
-        assert!(!col.has_non_null_i64_range(&none_col, 0, 1).unwrap());
-        assert!(!col.has_non_null_i64_range(&none_col, 1, 2).unwrap());
-        assert!(!col.has_non_null_i64_range(&none_col, 2, 3).unwrap());
-        assert!(!col.has_non_null_i64_range(&none_col, 3, 4).unwrap());
-    }
-
-    #[test]
-    fn column_size() {
-        let i64col = Column::I64(vec![Some(1), Some(1)], StatValues::new(1));
-        assert_eq!(40, i64col.size());
-
-        let f64col = Column::F64(vec![Some(1.1), Some(1.1), Some(1.1)], StatValues::new(1.1));
-        assert_eq!(56, f64col.size());
-
-        let boolcol = Column::Bool(vec![Some(true)], StatValues::new(true));
-        assert_eq!(9, boolcol.size());
-
-        let tagcol = Column::Tag(
-            vec![Some(1), Some(1), Some(1), Some(1)],
-            StatValues::new("foo".to_string()),
-        );
-        assert_eq!(40, tagcol.size());
-
-        let stringcol = Column::String(
-            vec![Some("foo".to_string()), Some("hello world".to_string())],
-            StatValues::new("foo".to_string()),
-        );
-        assert_eq!(70, stringcol.size());
-    }
-}
--- a/mutable_buffer/src/dictionary.rs
+++ b/mutable_buffer/src/dictionary.rs
@ -8,7 +8,7 @@ use string_interner::{
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Dictionary lookup error on id {}", id))]
-    DictionaryIdLookupError { id: u32 },
+    DictionaryIdLookupError { id: DID },

    #[snafu(display("Dictionary lookup error for value {}", value))]
    DictionaryValueLookupError { value: String },
@ -16,6 +16,30 @@ pub enum Error {

 pub type Result<T, E = Error> = std::result::Result<T, E>;

+/// A "dictionary ID" (DID) is a compact numeric representation of an interned
+/// string in the dictionary. The same string always maps the same DID. DIDs can
+/// be compared, hashed and cheaply copied around, just like small integers.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct DID(DefaultSymbol);
+
+impl DID {
+    fn new(s: DefaultSymbol) -> Self {
+        Self(s)
+    }
+}
+
+impl From<DID> for DefaultSymbol {
+    fn from(id: DID) -> Self {
+        id.0
+    }
+}
+
+impl std::fmt::Display for DID {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0.to_usize())
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct Dictionary {
    interner: StringInterner<DefaultSymbol, StringBackend<DefaultSymbol>, DefaultHashBuilder>,
@ -39,43 +63,37 @@ impl Dictionary {

    /// Returns the id corresponding to value, adding an entry for the
    /// id if it is not yet present in the dictionary.
-    pub fn lookup_value_or_insert(&mut self, value: &str) -> u32 {
+    pub fn lookup_value_or_insert(&mut self, value: &str) -> DID {
        self.id(value).unwrap_or_else(|| {
            self.size += value.len();
            self.size += std::mem::size_of::<u32>();
-            symbol_to_u32(self.interner.get_or_intern(value))
+            DID::new(self.interner.get_or_intern(value))
        })
    }

    /// Returns the ID in self.dictionary that corresponds to `value`, if any.
    /// Returns an error if no such value is found. Does not add the value
    /// to the dictionary.
-    pub fn lookup_value(&self, value: &str) -> Result<u32> {
+    pub fn lookup_value(&self, value: &str) -> Result<DID> {
        self.id(value).context(DictionaryValueLookupError { value })
    }

    /// Returns the ID in self.dictionary that corresponds to `value`,
    /// if any. No error is returned to avoid an allocation when no value is
    /// present
-    pub fn id(&self, value: &str) -> Option<u32> {
-        self.interner.get(value).map(symbol_to_u32)
+    pub fn id(&self, value: &str) -> Option<DID> {
+        self.interner.get(value).map(DID::new)
    }

    /// Returns the str in self.dictionary that corresponds to `id`,
    /// if any. Returns an error if no such id is found
-    pub fn lookup_id(&self, id: u32) -> Result<&str> {
-        let symbol =
-            Symbol::try_from_usize(id as usize).expect("to be able to convert u32 to symbol");
+    pub fn lookup_id(&self, id: DID) -> Result<&str> {
        self.interner
-            .resolve(symbol)
+            .resolve(id.into())
            .context(DictionaryIdLookupError { id })
    }
 }

-fn symbol_to_u32(sym: DefaultSymbol) -> u32 {
-    sym.to_usize() as u32
-}
-
 #[cfg(test)]
 mod test {
    use crate::dictionary::Dictionary;
--- a/mutable_buffer/src/lib.rs
+++ b/mutable_buffer/src/lib.rs
@ -60,5 +60,4 @@
 pub mod chunk;
 mod column;
 mod dictionary;
-pub mod pred;
 mod table;
--- a/mutable_buffer/src/pred.rs
+++ b/mutable_buffer/src/pred.rs
@ -1,298 +0,0 @@
-use std::collections::{BTreeSet, HashSet};
-
-use crate::dictionary::{Dictionary, Error as DictionaryError};
-
-use arrow_deps::{
-    datafusion::{
-        error::{DataFusionError, Result as DatafusionResult},
-        logical_plan::{Expr, ExpressionVisitor, Operator, Recursion},
-        optimizer::utils::expr_to_column_names,
-    },
-    util::{make_range_expr, AndExprBuilder},
-};
-use data_types::timestamp::TimestampRange;
-use internal_types::schema::TIME_COLUMN_NAME;
-
-//use snafu::{OptionExt, ResultExt, Snafu};
-use snafu::{ensure, ResultExt, Snafu};
-
-#[derive(Debug, Snafu)]
-pub enum Error {
-    #[snafu(display("Error writing table '{}': {}", table_name, source))]
-    TableWrite {
-        table_name: String,
-        source: crate::table::Error,
-    },
-
-    #[snafu(display("Time Column was not not found in dictionary: {}", source))]
-    TimeColumnNotFound { source: DictionaryError },
-
-    #[snafu(display("Unsupported predicate. Mutable buffer does not support: {}", source))]
-    UnsupportedPredicate { source: DataFusionError },
-
-    #[snafu(display(
-        "Internal error visiting expressions in ChunkPredicateBuilder: {}",
-        source
-    ))]
-    InternalVisitingExpressions { source: DataFusionError },
-
-    #[snafu(display("table_names has already been specified in ChunkPredicateBuilder"))]
-    TableNamesAlreadySet {},
-
-    #[snafu(display("field_names has already been specified in ChunkPredicateBuilder"))]
-    FieldNamesAlreadySet {},
-
-    #[snafu(display("range has already been specified in ChunkPredicateBuilder"))]
-    RangeAlreadySet {},
-
-    #[snafu(display("exprs has already been specified in ChunkPredicateBuilder"))]
-    ExprsAlreadySet {},
-
-    #[snafu(display("required_columns has already been specified in ChunkPredicateBuilder"))]
-    RequiredColumnsAlreadySet {},
-}
-
-pub type Result<T, E = Error> = std::result::Result<T, E>;
-
-/// Describes the result of translating a set of strings into
-/// chunk specific ids
-#[derive(Debug, PartialEq, Eq)]
-pub enum ChunkIdSet {
-    /// At least one of the strings was not present in the chunks'
-    /// dictionary.
-    ///
-    /// This is important when testing for the presence of all ids in
-    /// a set, as we know they can not all be present
-    AtLeastOneMissing,
-
-    /// All strings existed in this chunk's dictionary
-    Present(BTreeSet<u32>),
-}
-
-/// a 'Compiled' set of predicates / filters that can be evaluated on
-/// this chunk (where strings have been translated to chunk
-/// specific u32 ids)
-#[derive(Debug, Default)]
-pub struct ChunkPredicate {
-    /// If present, restrict the request to just those tables whose
-    /// names are in table_names. If present but empty, means there
-    /// was a predicate but no tables named that way exist in the
-    /// chunk (so no table can pass)
-    pub table_name_predicate: Option<BTreeSet<u32>>,
-
-    /// Optional column restriction. If present, further
-    /// restrict any field columns returned to only those named, and
-    /// skip tables entirely when querying metadata that do not have
-    /// *any* of the fields
-    pub field_name_predicate: Option<BTreeSet<u32>>,
-
-    /// General DataFusion expressions (arbitrary predicates) applied
-    /// as a filter using logical conjuction (aka are 'AND'ed
-    /// together). Only rows that evaluate to TRUE for all these
-    /// expressions should be returned.
-    ///
-    /// TODO these exprs should eventually be removed (when they are
-    /// all handled one layer up in the query layer)
-    pub chunk_exprs: Vec<Expr>,
-
-    /// If Some, then the table must contain all columns specified
-    /// to pass the predicate
-    pub required_columns: Option<ChunkIdSet>,
-
-    /// The id of the "time" column in this chunk
-    pub time_column_id: u32,
-
-    /// Timestamp range: only rows within this range should be considered
-    pub range: Option<TimestampRange>,
-}
-
-impl ChunkPredicate {
-    /// Creates and adds a datafuson predicate representing the
-    /// combination of predicate and timestamp.
-    pub fn filter_expr(&self) -> Option<Expr> {
-        // build up a list of expressions
-        let mut builder =
-            AndExprBuilder::default().append_opt(self.make_timestamp_predicate_expr());
-
-        for expr in &self.chunk_exprs {
-            builder = builder.append_expr(expr.clone());
-        }
-
-        builder.build()
-    }
-
-    /// For plans which select a subset of fields, returns true if
-    /// the field should be included in the results
-    pub fn should_include_field(&self, field_id: u32) -> bool {
-        match &self.field_name_predicate {
-            None => true,
-            Some(field_restriction) => field_restriction.contains(&field_id),
-        }
-    }
-
-    /// Return true if this column is the time column
-    pub fn is_time_column(&self, id: u32) -> bool {
-        self.time_column_id == id
-    }
-
-    /// Creates a DataFusion predicate for appliying a timestamp range:
-    ///
-    /// range.start <= time and time < range.end`
-    fn make_timestamp_predicate_expr(&self) -> Option<Expr> {
-        self.range
-            .map(|range| make_range_expr(range.start, range.end, TIME_COLUMN_NAME))
-    }
-}
-
-/// Builds ChunkPredicates
-#[derive(Debug)]
-pub struct ChunkPredicateBuilder<'a> {
-    inner: ChunkPredicate,
-    dictionary: &'a Dictionary,
-}
-
-impl<'a> ChunkPredicateBuilder<'a> {
-    pub fn new(dictionary: &'a Dictionary) -> Result<Self> {
-        let time_column_id = dictionary
-            .lookup_value(TIME_COLUMN_NAME)
-            .context(TimeColumnNotFound)?;
-
-        let inner = ChunkPredicate {
-            time_column_id,
-            ..Default::default()
-        };
-
-        Ok(Self { inner, dictionary })
-    }
-
-    /// Set table_name_predicate so only tables in `names` are returned
-    pub fn table_names(mut self, names: Option<&BTreeSet<String>>) -> Result<Self> {
-        ensure!(
-            self.inner.table_name_predicate.is_none(),
-            TableNamesAlreadySet
-        );
-        self.inner.table_name_predicate = self.compile_string_list(names);
-        Ok(self)
-    }
-
-    /// Set field_name_predicate so only tables in `names` are returned
-    pub fn field_names(mut self, names: Option<&BTreeSet<String>>) -> Result<Self> {
-        ensure!(
-            self.inner.field_name_predicate.is_none(),
-            FieldNamesAlreadySet
-        );
-        self.inner.field_name_predicate = self.compile_string_list(names);
-        Ok(self)
-    }
-
-    pub fn range(mut self, range: Option<TimestampRange>) -> Result<Self> {
-        ensure!(self.inner.range.is_none(), RangeAlreadySet);
-        self.inner.range = range;
-        Ok(self)
-    }
-
-    /// Set the general purpose predicates
-    pub fn exprs(mut self, chunk_exprs: Vec<Expr>) -> Result<Self> {
-        // In order to evaluate expressions in the table, all columns
-        // referenced in the expression must appear (I think, not sure
-        // about NOT, etc so panic if we see one of those);
-        let mut visitor = SupportVisitor {};
-        let mut predicate_columns: HashSet<String> = HashSet::new();
-        for expr in &chunk_exprs {
-            visitor = expr.accept(visitor).context(UnsupportedPredicate)?;
-            expr_to_column_names(&expr, &mut predicate_columns)
-                .context(InternalVisitingExpressions)?;
-        }
-
-        ensure!(self.inner.chunk_exprs.is_empty(), ExprsAlreadySet);
-        self.inner.chunk_exprs = chunk_exprs;
-
-        // if there are any column references in the expression, ensure they appear in
-        // any table
-        if !predicate_columns.is_empty() {
-            ensure!(
-                self.inner.required_columns.is_none(),
-                RequiredColumnsAlreadySet
-            );
-            self.inner.required_columns = Some(self.make_chunk_ids(predicate_columns.iter()));
-        }
-        Ok(self)
-    }
-
-    /// Return the created chunk predicate, consuming self
-    pub fn build(self) -> ChunkPredicate {
-        self.inner
-    }
-
-    /// Converts a Set of strings into a set of ids in terms of this
-    /// Chunk's dictionary.
-    ///
-    /// If there are no matching Strings in the chunks dictionary,
-    /// those strings are ignored and a (potentially empty) set is
-    /// returned.
-    fn compile_string_list(&self, names: Option<&BTreeSet<String>>) -> Option<BTreeSet<u32>> {
-        names.map(|names| {
-            names
-                .iter()
-                .filter_map(|name| self.dictionary.id(name))
-                .collect::<BTreeSet<_>>()
-        })
-    }
-
-    /// Translate a bunch of strings into a set of ids from the dictionarythis
-    /// chunk
-    pub fn make_chunk_ids<'b, I>(&self, predicate_columns: I) -> ChunkIdSet
-    where
-        I: Iterator<Item = &'b String>,
-    {
-        let mut symbols = BTreeSet::new();
-        for column_name in predicate_columns {
-            if let Some(column_id) = self.dictionary.id(column_name) {
-                symbols.insert(column_id);
-            } else {
-                return ChunkIdSet::AtLeastOneMissing;
-            }
-        }
-
-        ChunkIdSet::Present(symbols)
-    }
-}
-
-/// Used to figure out if we know how to deal with this kind of
-/// predicate in the write buffer
-struct SupportVisitor {}
-
-impl ExpressionVisitor for SupportVisitor {
-    fn pre_visit(self, expr: &Expr) -> DatafusionResult<Recursion<Self>> {
-        match expr {
-            Expr::Literal(..) => Ok(Recursion::Continue(self)),
-            Expr::Column(..) => Ok(Recursion::Continue(self)),
-            Expr::BinaryExpr { op, .. } => {
-                match op {
-                    Operator::Eq
-                    | Operator::Lt
-                    | Operator::LtEq
-                    | Operator::Gt
-                    | Operator::GtEq
-                    | Operator::Plus
-                    | Operator::Minus
-                    | Operator::Multiply
-                    | Operator::Divide
-                    | Operator::And
-                    | Operator::Or => Ok(Recursion::Continue(self)),
-                    // Unsupported (need to think about ramifications)
-                    Operator::NotEq | Operator::Modulus | Operator::Like | Operator::NotLike => {
-                        Err(DataFusionError::NotImplemented(format!(
-                            "Operator {:?} not yet supported in IOx MutableBuffer",
-                            op
-                        )))
-                    }
-                }
-            }
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "Unsupported expression in mutable_buffer database: {:?}",
-                expr
-            ))),
-        }
-    }
-}
--- a/mutable_buffer/src/table.rs
+++ b/mutable_buffer/src/table.rs
@ -1,19 +1,16 @@
-use generated_types::wal as wb;
-
-use std::{
-    collections::{BTreeMap, BTreeSet},
-    sync::Arc,
-};
+use std::{cmp, collections::BTreeMap, sync::Arc};

 use crate::{
-    chunk::Chunk,
    column,
    column::Column,
-    dictionary::{Dictionary, Error as DictionaryError},
-    pred::{ChunkIdSet, ChunkPredicate},
+    dictionary::{Dictionary, Error as DictionaryError, DID},
+};
+use data_types::{
+    database_rules::WriterId,
+    partition_metadata::{ColumnSummary, Statistics},
 };
-use data_types::partition_metadata::{ColumnSummary, Statistics};
 use internal_types::{
+    entry::{self, ClockValue},
    schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME},
    selection::Selection,
 };
@ -33,12 +30,8 @@ use arrow_deps::{

 #[derive(Debug, Snafu)]
 pub enum Error {
-    #[snafu(display("Tag value ID {} not found in dictionary of chunk {}", value, chunk))]
-    TagValueIdNotFoundInDictionary {
-        value: u32,
-        chunk: u64,
-        source: DictionaryError,
-    },
+    #[snafu(display("Tag value ID {} not found in dictionary of chunk", value))]
+    TagValueIdNotFoundInDictionary { value: DID, source: DictionaryError },

    #[snafu(display("Column error on column {}: {}", column, source))]
    ColumnError {
@ -53,7 +46,7 @@ pub enum Error {
        actual_column_type
    ))]
    InternalColumnTypeMismatch {
-        column_id: u32,
+        column_id: DID,
        expected_column_type: String,
        actual_column_type: String,
    },
@ -61,21 +54,12 @@ pub enum Error {
    #[snafu(display("Internal error: unexpected aggregate request for None aggregate",))]
    InternalUnexpectedNoneAggregate {},

-    #[snafu(display(
-        "Column name '{}' not found in dictionary of chunk {}",
-        column_name,
-        chunk
-    ))]
-    ColumnNameNotFoundInDictionary { column_name: String, chunk: u64 },
+    #[snafu(display("Column name '{}' not found in dictionary of chunk", column_name,))]
+    ColumnNameNotFoundInDictionary { column_name: String },

-    #[snafu(display(
-        "Internal: Column id '{}' not found in dictionary of chunk {}",
-        column_id,
-        chunk
-    ))]
+    #[snafu(display("Internal: Column id '{}' not found in dictionary", column_id,))]
    ColumnIdNotFoundInDictionary {
-        column_id: u32,
-        chunk: u64,
+        column_id: DID,
        source: DictionaryError,
    },

@ -92,22 +76,22 @@ pub enum Error {
        column_name,
        column_id
    ))]
-    InternalNoColumnInIndex { column_name: String, column_id: u32 },
+    InternalNoColumnInIndex { column_name: String, column_id: DID },

    #[snafu(display("Error creating column from wal for column {}: {}", column, source))]
    CreatingFromWal {
-        column: u32,
+        column: DID,
        source: crate::column::Error,
    },

    #[snafu(display("Error evaluating column predicate for column {}: {}", column, source))]
    ColumnPredicateEvaluation {
-        column: u32,
+        column: DID,
        source: crate::column::Error,
    },

    #[snafu(display("Row insert to table {} missing column name", table))]
-    ColumnNameNotInRow { table: u32 },
+    ColumnNameNotInRow { table: DID },

    #[snafu(display(
        "Group column '{}' not found in tag columns: {}",
@ -123,68 +107,27 @@ pub enum Error {
    DuplicateGroupColumn { column_name: String },

    #[snafu(display("Column {} not found in table {}", id, table_id))]
-    ColumnIdNotFound { id: u32, table_id: u32 },
+    ColumnIdNotFound { id: DID, table_id: DID },
 }
 pub type Result<T, E = Error> = std::result::Result<T, E>;

 #[derive(Debug, Clone)]
 pub struct Table {
-    /// Name of the table as a u32 in the chunk dictionary
-    pub id: u32,
+    /// Name of the table as a DID in the chunk dictionary
+    pub id: DID,

    /// Map of column id from the chunk dictionary to the column
-    pub columns: BTreeMap<u32, Column>,
+    pub columns: BTreeMap<DID, Column>,
 }

 impl Table {
-    pub fn new(id: u32) -> Self {
+    pub fn new(id: DID) -> Self {
        Self {
            id,
            columns: BTreeMap::new(),
        }
    }

-    fn append_row(
-        &mut self,
-        dictionary: &mut Dictionary,
-        values: &flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<wb::Value<'_>>>,
-    ) -> Result<()> {
-        let row_count = self.row_count();
-
-        // insert new columns and validate existing ones
-        for value in values {
-            let column_name = value
-                .column()
-                .context(ColumnNameNotInRow { table: self.id })?;
-            let column_id = dictionary.lookup_value_or_insert(column_name);
-
-            let column = match self.columns.get_mut(&column_id) {
-                Some(col) => col,
-                None => {
-                    // Add the column and make all values for existing rows None
-                    self.columns.insert(
-                        column_id,
-                        Column::with_value(dictionary, row_count, value)
-                            .context(CreatingFromWal { column: column_id })?,
-                    );
-
-                    continue;
-                }
-            };
-
-            column.push(dictionary, &value).context(ColumnError {
-                column: column_name,
-            })?;
-        }
-
-        // make sure all the columns are of the same length
-        for col in self.columns.values_mut() {
-            col.push_none_if_len_equal(row_count);
-        }
-
-        Ok(())
-    }
-
    pub fn row_count(&self) -> usize {
        self.columns
            .values()
@ -201,55 +144,124 @@ impl Table {
    }

    /// Returns a reference to the specified column
-    pub(crate) fn column(&self, column_id: u32) -> Result<&Column> {
+    pub(crate) fn column(&self, column_id: DID) -> Result<&Column> {
        self.columns.get(&column_id).context(ColumnIdNotFound {
            id: column_id,
            table_id: self.id,
        })
    }

-    /// Returns a reference to the specified column as a slice of
-    /// i64s. Errors if the type is not i64
-    pub fn column_i64(&self, column_id: u32) -> Result<&[Option<i64>]> {
-        let column = self.column(column_id)?;
-        match column {
-            Column::I64(vals, _) => Ok(vals),
-            _ => InternalColumnTypeMismatch {
-                column_id,
-                expected_column_type: "i64",
-                actual_column_type: column.type_description(),
-            }
-            .fail(),
-        }
-    }
-
-    pub fn append_rows(
+    /// Validates the schema of the passed in columns, then adds their values to
+    /// the associated columns in the table and updates summary statistics.
+    pub fn write_columns(
        &mut self,
        dictionary: &mut Dictionary,
-        rows: &flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<wb::Row<'_>>>,
+        _clock_value: ClockValue,
+        _writer_id: WriterId,
+        columns: Vec<entry::Column<'_>>,
    ) -> Result<()> {
-        for row in rows {
-            if let Some(values) = row.values() {
-                self.append_row(dictionary, &values)?;
+        // get the column ids and validate schema for those that already exist
+        let columns_with_inserts = columns
+            .into_iter()
+            .map(|insert_column| {
+                let column_id = dictionary.lookup_value_or_insert(insert_column.name());
+                let values = insert_column.values();
+
+                if let Some(c) = self.columns.get(&column_id) {
+                    match (&values, c) {
+                        (entry::TypedValuesIterator::Bool(_), Column::Bool(_, _)) => (),
+                        (entry::TypedValuesIterator::U64(_), Column::U64(_, _)) => (),
+                        (entry::TypedValuesIterator::F64(_), Column::F64(_, _)) => (),
+                        (entry::TypedValuesIterator::I64(_), Column::I64(_, _)) => (),
+                        (entry::TypedValuesIterator::String(_), Column::String(_, _)) => {
+                            if !insert_column.is_field() {
+                                InternalColumnTypeMismatch {
+                                    column_id,
+                                    expected_column_type: c.type_description(),
+                                    actual_column_type: values.type_description(),
+                                }
+                                .fail()?
+                            };
+                        }
+                        (entry::TypedValuesIterator::String(_), Column::Tag(_, _)) => {
+                            if !insert_column.is_tag() {
+                                InternalColumnTypeMismatch {
+                                    column_id,
+                                    expected_column_type: c.type_description(),
+                                    actual_column_type: values.type_description(),
+                                }
+                                .fail()?
+                            };
+                        }
+                        _ => InternalColumnTypeMismatch {
+                            column_id,
+                            expected_column_type: c.type_description(),
+                            actual_column_type: values.type_description(),
+                        }
+                        .fail()?,
+                    }
+                }
+
+                Ok((column_id, insert_column.logical_type(), values))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let row_count_before_insert = self.row_count();
+
+        for (column_id, logical_type, values) in columns_with_inserts.into_iter() {
+            match self.columns.get_mut(&column_id) {
+                Some(c) => c
+                    .push_typed_values(dictionary, logical_type, values)
+                    .with_context(|| {
+                        let column = dictionary
+                            .lookup_id(column_id)
+                            .expect("column name must be present in dictionary");
+                        ColumnError { column }
+                    })?,
+                None => {
+                    self.columns.insert(
+                        column_id,
+                        Column::new_from_typed_values(
+                            dictionary,
+                            row_count_before_insert,
+                            logical_type,
+                            values,
+                        ),
+                    );
+                }
            }
        }

+        // ensure all columns have the same number of rows as the one with the most.
+        // This adds nulls to the columns that weren't included in this write
+        let max_row_count = self
+            .columns
+            .values()
+            .fold(row_count_before_insert, |max, col| cmp::max(max, col.len()));
+
+        for c in self.columns.values_mut() {
+            c.push_nulls_to_len(max_row_count);
+        }
+
        Ok(())
    }

    /// Returns the column selection for all the columns in this table, orderd
    /// by table name
-    fn all_columns_selection<'a>(&self, chunk: &'a Chunk) -> Result<TableColSelection<'a>> {
+    fn all_columns_selection<'a>(
+        &self,
+        dictionary: &'a Dictionary,
+    ) -> Result<TableColSelection<'a>> {
        let cols = self
            .columns
            .iter()
            .map(|(column_id, _)| {
-                let column_name = chunk.dictionary.lookup_id(*column_id).context(
-                    ColumnIdNotFoundInDictionary {
-                        column_id: *column_id,
-                        chunk: chunk.id,
-                    },
-                )?;
+                let column_name =
+                    dictionary
+                        .lookup_id(*column_id)
+                        .context(ColumnIdNotFoundInDictionary {
+                            column_id: *column_id,
+                        })?;
                Ok(ColSelection {
                    column_name,
                    column_id: *column_id,
@ -266,45 +278,45 @@ impl Table {
    /// Returns a column selection for just the specified columns
    fn specific_columns_selection<'a>(
        &self,
-        chunk: &'a Chunk,
+        dictionary: &'a Dictionary,
        columns: &'a [&'a str],
    ) -> Result<TableColSelection<'a>> {
-        let cols =
-            columns
-                .iter()
-                .map(|&column_name| {
-                    let column_id = chunk.dictionary.id(column_name).context(
-                        ColumnNameNotFoundInDictionary {
-                            column_name,
-                            chunk: chunk.id,
-                        },
-                    )?;
+        let cols = columns
+            .iter()
+            .map(|&column_name| {
+                let column_id = dictionary
+                    .id(column_name)
+                    .context(ColumnNameNotFoundInDictionary { column_name })?;

-                    Ok(ColSelection {
-                        column_name,
-                        column_id,
-                    })
+                Ok(ColSelection {
+                    column_name,
+                    column_id,
                })
-                .collect::<Result<_>>()?;
+            })
+            .collect::<Result<_>>()?;

        Ok(TableColSelection { cols })
    }

    /// Converts this table to an arrow record batch.
-    pub fn to_arrow(&self, chunk: &Chunk, selection: Selection<'_>) -> Result<RecordBatch> {
+    pub fn to_arrow(
+        &self,
+        dictionary: &Dictionary,
+        selection: Selection<'_>,
+    ) -> Result<RecordBatch> {
        // translate chunk selection into name/indexes:
        let selection = match selection {
-            Selection::All => self.all_columns_selection(chunk),
-            Selection::Some(cols) => self.specific_columns_selection(chunk, cols),
+            Selection::All => self.all_columns_selection(dictionary),
+            Selection::Some(cols) => self.specific_columns_selection(dictionary, cols),
        }?;
-        self.to_arrow_impl(chunk, &selection)
+        self.to_arrow_impl(dictionary, &selection)
    }

-    pub fn schema(&self, chunk: &Chunk, selection: Selection<'_>) -> Result<Schema> {
+    pub fn schema(&self, dictionary: &Dictionary, selection: Selection<'_>) -> Result<Schema> {
        // translate chunk selection into name/indexes:
        let selection = match selection {
-            Selection::All => self.all_columns_selection(chunk),
-            Selection::Some(cols) => self.specific_columns_selection(chunk, cols),
+            Selection::All => self.all_columns_selection(dictionary),
+            Selection::Some(cols) => self.specific_columns_selection(dictionary, cols),
        }?;
        self.schema_impl(&selection)
    }
@ -341,7 +353,7 @@ impl Table {
    /// requested columns with index are tuples of column_name, column_index
    fn to_arrow_impl(
        &self,
-        chunk: &Chunk,
+        dictionary: &Dictionary,
        selection: &TableColSelection<'_>,
    ) -> Result<RecordBatch> {
        let mut columns = Vec::with_capacity(selection.cols.len());
@ -370,12 +382,9 @@ impl Table {
                        match v {
                            None => builder.append_null(),
                            Some(value_id) => {
-                                let tag_value = chunk.dictionary.lookup_id(*value_id).context(
-                                    TagValueIdNotFoundInDictionary {
-                                        value: *value_id,
-                                        chunk: chunk.id,
-                                    },
-                                )?;
+                                let tag_value = dictionary
+                                    .lookup_id(*value_id)
+                                    .context(TagValueIdNotFoundInDictionary { value: *value_id })?;
                                builder.append_value(tag_value)
                            }
                        }
@ -430,124 +439,11 @@ impl Table {
        RecordBatch::try_new(schema, columns).context(ArrowError {})
    }

-    /// returns true if any row in this table could possible match the
-    /// predicate. true does not mean any rows will *actually* match,
-    /// just that the entire table can not be ruled out.
-    ///
-    /// false means that no rows in this table could possibly match
-    pub fn could_match_predicate(&self, chunk_predicate: &ChunkPredicate) -> Result<bool> {
-        Ok(
-            self.matches_column_name_predicate(chunk_predicate.field_name_predicate.as_ref())
-                && self.matches_table_name_predicate(chunk_predicate.table_name_predicate.as_ref())
-                && self.matches_timestamp_predicate(chunk_predicate)?
-                && self.has_columns(chunk_predicate.required_columns.as_ref()),
-        )
-    }
-
-    /// Returns true if the table contains any of the field columns
-    /// requested or there are no specific fields requested.
-    fn matches_column_name_predicate(&self, column_selection: Option<&BTreeSet<u32>>) -> bool {
-        match column_selection {
-            Some(column_selection) => {
-                for column_id in column_selection {
-                    if let Some(column) = self.columns.get(column_id) {
-                        if !column.is_tag() {
-                            return true;
-                        }
-                    }
-                }
-
-                // selection only had tag columns
-                false
-            }
-            None => true, // no specific selection
-        }
-    }
-
-    fn matches_table_name_predicate(&self, table_name_predicate: Option<&BTreeSet<u32>>) -> bool {
-        match table_name_predicate {
-            Some(table_name_predicate) => table_name_predicate.contains(&self.id),
-            None => true, // no table predicate
-        }
-    }
-
-    /// returns true if there are any timestamps in this table that
-    /// fall within the timestamp range
-    fn matches_timestamp_predicate(&self, chunk_predicate: &ChunkPredicate) -> Result<bool> {
-        match &chunk_predicate.range {
-            None => Ok(true),
-            Some(range) => {
-                let time_column_id = chunk_predicate.time_column_id;
-                let time_column = self.column(time_column_id)?;
-                time_column.has_i64_range(range.start, range.end).context(
-                    ColumnPredicateEvaluation {
-                        column: time_column_id,
-                    },
-                )
-            }
-        }
-    }
-
-    /// returns true if no columns are specified, or the table has all
-    /// columns specified
-    fn has_columns(&self, columns: Option<&ChunkIdSet>) -> bool {
-        if let Some(columns) = columns {
-            match columns {
-                ChunkIdSet::AtLeastOneMissing => return false,
-                ChunkIdSet::Present(symbols) => {
-                    for symbol in symbols {
-                        if !self.columns.contains_key(symbol) {
-                            return false;
-                        }
-                    }
-                }
-            }
-        }
-        true
-    }
-
-    /// returns true if there are any rows in column that are non-null
-    /// and within the timestamp range specified by pred
-    pub(crate) fn column_matches_predicate(
-        &self,
-        column: &Column,
-        chunk_predicate: &ChunkPredicate,
-    ) -> Result<bool> {
-        match column {
-            Column::F64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-            Column::I64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-            Column::U64(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-            Column::String(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-            Column::Bool(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-            Column::Tag(v, _) => self.column_value_matches_predicate(v, chunk_predicate),
-        }
-    }
-
-    fn column_value_matches_predicate<T>(
-        &self,
-        column_value: &[Option<T>],
-        chunk_predicate: &ChunkPredicate,
-    ) -> Result<bool> {
-        match chunk_predicate.range {
-            None => Ok(true),
-            Some(range) => {
-                let time_column_id = chunk_predicate.time_column_id;
-                let time_column = self.column(time_column_id)?;
-                time_column
-                    .has_non_null_i64_range(column_value, range.start, range.end)
-                    .context(ColumnPredicateEvaluation {
-                        column: time_column_id,
-                    })
-            }
-        }
-    }
-
-    pub fn stats(&self, chunk: &Chunk) -> Vec<ColumnSummary> {
+    pub fn stats(&self, dictionary: &Dictionary) -> Vec<ColumnSummary> {
        self.columns
            .iter()
            .map(|(column_id, c)| {
-                let column_name = chunk
-                    .dictionary
+                let column_name = dictionary
                    .lookup_id(*column_id)
                    .expect("column name in dictionary");

@ -572,7 +468,7 @@ impl Table {

 struct ColSelection<'a> {
    column_name: &'a str,
-    column_id: u32,
+    column_id: DID,
 }

 /// Represets a set of column_name, column_index pairs
@ -591,61 +487,13 @@ impl<'a> TableColSelection<'a> {

 #[cfg(test)]
 mod tests {
-
-    use influxdb_line_protocol::{parse_lines, ParsedLine};
-    use internal_types::data::split_lines_into_write_entry_partitions;
+    use internal_types::entry::test_helpers::lp_to_entry;

    use super::*;
-    use tracker::MemRegistry;
-
-    #[test]
-    fn test_has_columns() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
-        let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));
-
-        let lp_lines = vec![
-            "h2o,state=MA,city=Boston temp=70.4 100",
-            "h2o,state=MA,city=Boston temp=72.4 250",
-        ];
-
-        write_lines_to_table(&mut table, dictionary, lp_lines);
-
-        let state_symbol = dictionary.id("state").unwrap();
-        let new_symbol = dictionary.lookup_value_or_insert("not_a_columns");
-
-        assert!(table.has_columns(None));
-
-        let pred = ChunkIdSet::AtLeastOneMissing;
-        assert!(!table.has_columns(Some(&pred)));
-
-        let set = BTreeSet::<u32>::new();
-        let pred = ChunkIdSet::Present(set);
-        assert!(table.has_columns(Some(&pred)));
-
-        let mut set = BTreeSet::new();
-        set.insert(state_symbol);
-        let pred = ChunkIdSet::Present(set);
-        assert!(table.has_columns(Some(&pred)));
-
-        let mut set = BTreeSet::new();
-        set.insert(new_symbol);
-        let pred = ChunkIdSet::Present(set);
-        assert!(!table.has_columns(Some(&pred)));
-
-        let mut set = BTreeSet::new();
-        set.insert(state_symbol);
-        set.insert(new_symbol);
-        let pred = ChunkIdSet::Present(set);
-        assert!(!table.has_columns(Some(&pred)));
-    }

    #[test]
    fn table_size() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
+        let mut dictionary = Dictionary::new();
        let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));

        let lp_lines = vec![
@ -653,111 +501,31 @@ mod tests {
            "h2o,state=MA,city=Boston temp=72.4 250",
        ];

-        write_lines_to_table(&mut table, dictionary, lp_lines.clone());
-        assert_eq!(128, table.size());
+        write_lines_to_table(&mut table, &mut dictionary, lp_lines.clone());
+        assert_eq!(112, table.size());

        // doesn't double because of the stats overhead
-        write_lines_to_table(&mut table, dictionary, lp_lines.clone());
-        assert_eq!(224, table.size());
+        write_lines_to_table(&mut table, &mut dictionary, lp_lines.clone());
+        assert_eq!(192, table.size());

        // now make sure it increased by the same amount minus stats overhead
-        write_lines_to_table(&mut table, dictionary, lp_lines);
-        assert_eq!(320, table.size());
-    }
-
-    #[test]
-    fn test_matches_table_name_predicate() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
-        let mut table = Table::new(dictionary.lookup_value_or_insert("h2o"));
-
-        let lp_lines = vec![
-            "h2o,state=MA,city=Boston temp=70.4 100",
-            "h2o,state=MA,city=Boston temp=72.4 250",
-        ];
-        write_lines_to_table(&mut table, dictionary, lp_lines);
-
-        let h2o_symbol = dictionary.id("h2o").unwrap();
-
-        assert!(table.matches_table_name_predicate(None));
-
-        let set = BTreeSet::new();
-        assert!(!table.matches_table_name_predicate(Some(&set)));
-
-        let mut set = BTreeSet::new();
-        set.insert(h2o_symbol);
-        assert!(table.matches_table_name_predicate(Some(&set)));
-
-        // Some symbol that is not the same as h2o_symbol
-        assert_ne!(37377, h2o_symbol);
-        let mut set = BTreeSet::new();
-        set.insert(37377);
-        assert!(!table.matches_table_name_predicate(Some(&set)));
-    }
-
-    #[test]
-    fn test_matches_column_name_predicate() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
-        let mut table = Table::new(dictionary.lookup_value_or_insert("h2o"));
-
-        let lp_lines = vec![
-            "h2o,state=MA,city=Boston temp=70.4,awesomeness=1000 100",
-            "h2o,state=MA,city=Boston temp=72.4,awesomeness=2000 250",
-        ];
-        write_lines_to_table(&mut table, dictionary, lp_lines);
-
-        let state_symbol = dictionary.id("state").unwrap();
-        let temp_symbol = dictionary.id("temp").unwrap();
-        let awesomeness_symbol = dictionary.id("awesomeness").unwrap();
-
-        assert!(table.matches_column_name_predicate(None));
-
-        let set = BTreeSet::new();
-        assert!(!table.matches_column_name_predicate(Some(&set)));
-
-        // tag columns should not count
-        let mut set = BTreeSet::new();
-        set.insert(state_symbol);
-        assert!(!table.matches_column_name_predicate(Some(&set)));
-
-        let mut set = BTreeSet::new();
-        set.insert(temp_symbol);
-        assert!(table.matches_column_name_predicate(Some(&set)));
-
-        let mut set = BTreeSet::new();
-        set.insert(temp_symbol);
-        set.insert(awesomeness_symbol);
-        assert!(table.matches_column_name_predicate(Some(&set)));
-
-        let mut set = BTreeSet::new();
-        set.insert(temp_symbol);
-        set.insert(awesomeness_symbol);
-        set.insert(1337); // some other symbol, but that is ok
-        assert!(table.matches_column_name_predicate(Some(&set)));
-
-        let mut set = BTreeSet::new();
-        set.insert(1337);
-        assert!(!table.matches_column_name_predicate(Some(&set)));
+        write_lines_to_table(&mut table, &mut dictionary, lp_lines);
+        assert_eq!(272, table.size());
    }

    #[test]
    fn test_to_arrow_schema_all() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
+        let mut dictionary = Dictionary::new();
        let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));

        let lp_lines = vec![
            "h2o,state=MA,city=Boston float_field=70.4,int_field=8i,uint_field=42u,bool_field=t,string_field=\"foo\" 100",
        ];

-        write_lines_to_table(&mut table, dictionary, lp_lines);
+        write_lines_to_table(&mut table, &mut dictionary, lp_lines);

        let selection = Selection::All;
-        let actual_schema = table.schema(&chunk, selection).unwrap();
+        let actual_schema = table.schema(&dictionary, selection).unwrap();
        let expected_schema = SchemaBuilder::new()
            .field("bool_field", ArrowDataType::Boolean)
            .tag("city")
@ -779,17 +547,15 @@ mod tests {

    #[test]
    fn test_to_arrow_schema_subset() {
-        let registry = Arc::new(MemRegistry::new());
-        let mut chunk = Chunk::new(42, registry.as_ref());
-        let dictionary = &mut chunk.dictionary;
+        let mut dictionary = Dictionary::new();
        let mut table = Table::new(dictionary.lookup_value_or_insert("table_name"));

        let lp_lines = vec!["h2o,state=MA,city=Boston float_field=70.4 100"];

-        write_lines_to_table(&mut table, dictionary, lp_lines);
+        write_lines_to_table(&mut table, &mut dictionary, lp_lines);

        let selection = Selection::Some(&["float_field"]);
-        let actual_schema = table.schema(&chunk, selection).unwrap();
+        let actual_schema = table.schema(&dictionary, selection).unwrap();
        let expected_schema = SchemaBuilder::new()
            .field("float_field", ArrowDataType::Float64)
            .build()
@ -802,29 +568,172 @@ mod tests {
        );
    }

+    #[test]
+    fn write_columns_validates_schema() {
+        let mut dictionary = Dictionary::new();
+        let mut table = Table::new(dictionary.lookup_value_or_insert("foo"));
+
+        let lp = "foo,t1=asdf iv=1i,uv=1u,fv=1.0,bv=true,sv=\"hi\" 1";
+        let entry = lp_to_entry(&lp);
+        table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .unwrap();
+
+        let lp = "foo t1=\"string\" 1";
+        let entry = lp_to_entry(&lp);
+        let response = table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .err()
+            .unwrap();
+        assert!(
+            matches!(
+                &response,
+                Error::InternalColumnTypeMismatch {
+                    expected_column_type,
+                    actual_column_type,
+                    ..
+                } if expected_column_type == "tag" && actual_column_type == "String"),
+            format!("didn't match returned error: {:?}", response)
+        );
+
+        let lp = "foo iv=1u 1";
+        let entry = lp_to_entry(&lp);
+        let response = table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .err()
+            .unwrap();
+        assert!(
+            matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "i64" && actual_column_type == "u64"),
+            format!("didn't match returned error: {:?}", response)
+        );
+
+        let lp = "foo fv=1i 1";
+        let entry = lp_to_entry(&lp);
+        let response = table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .err()
+            .unwrap();
+        assert!(
+            matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "f64" && actual_column_type == "i64"),
+            format!("didn't match returned error: {:?}", response)
+        );
+
+        let lp = "foo bv=1 1";
+        let entry = lp_to_entry(&lp);
+        let response = table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .err()
+            .unwrap();
+        assert!(
+            matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "bool" && actual_column_type == "f64"),
+            format!("didn't match returned error: {:?}", response)
+        );
+
+        let lp = "foo sv=true 1";
+        let entry = lp_to_entry(&lp);
+        let response = table
+            .write_columns(
+                &mut dictionary,
+                ClockValue::new(0),
+                0,
+                entry
+                    .partition_writes()
+                    .unwrap()
+                    .first()
+                    .unwrap()
+                    .table_batches()
+                    .first()
+                    .unwrap()
+                    .columns(),
+            )
+            .err()
+            .unwrap();
+        assert!(
+            matches!(&response, Error::InternalColumnTypeMismatch {expected_column_type, actual_column_type, ..} if expected_column_type == "String" && actual_column_type == "bool"),
+            format!("didn't match returned error: {:?}", response)
+        );
+    }
+
    ///  Insert the line protocol lines in `lp_lines` into this table
    fn write_lines_to_table(table: &mut Table, dictionary: &mut Dictionary, lp_lines: Vec<&str>) {
        let lp_data = lp_lines.join("\n");
+        let entry = lp_to_entry(&lp_data);

-        let lines: Vec<_> = parse_lines(&lp_data).map(|l| l.unwrap()).collect();
-
-        let data = split_lines_into_write_entry_partitions(chunk_key_func, &lines);
-
-        let batch = flatbuffers::root::<wb::WriteBufferBatch<'_>>(&data).unwrap();
-        let entries = batch.entries().expect("at least one entry");
-
-        for entry in entries {
-            let table_batches = entry.table_batches().expect("there were table batches");
-            for batch in table_batches {
-                let rows = batch.rows().expect("Had rows in the batch");
-                table
-                    .append_rows(dictionary, &rows)
-                    .expect("Appended the row");
-            }
+        for batch in entry
+            .partition_writes()
+            .unwrap()
+            .first()
+            .unwrap()
+            .table_batches()
+        {
+            table
+                .write_columns(dictionary, ClockValue::new(0), 0, batch.columns())
+                .unwrap();
        }
    }
-
-    fn chunk_key_func(_: &ParsedLine<'_>) -> String {
-        String::from("the_chunk_key")
-    }
 }
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@ -14,7 +14,7 @@ bytes = "1.0"
 chrono = "0.4"
 # Google Cloud Storage integration
 cloud-storage = "0.9.0"
-futures = "0.3.5"
+futures = "0.3"
 itertools = "0.9.0"
 percent-encoding = "2.1"
 # rusoto crates are for Amazon S3 integration
--- a/parquet_file/Cargo.toml
+++ b/parquet_file/Cargo.toml
@ -9,6 +9,7 @@ arrow_deps = { path = "../arrow_deps" }
 bytes = "1.0"
 data_types = { path = "../data_types" }
 futures = "0.3.7"
+internal_types = {path = "../internal_types"}
 object_store = {path = "../object_store"}
 parking_lot = "0.11.1"
 snafu = "0.6"
--- a/parquet_file/src/chunk.rs
+++ b/parquet_file/src/chunk.rs
@ -1,22 +1,44 @@
+use snafu::{OptionExt, ResultExt, Snafu};
 use std::collections::BTreeSet;

 use crate::table::Table;
-use data_types::partition_metadata::TableSummary;
+use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
+use internal_types::{schema::Schema, selection::Selection};
 use object_store::path::Path;
 use tracker::{MemRegistry, MemTracker};

 use std::mem;

+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Error writing table '{}': {}", table_name, source))]
+    TableWrite {
+        table_name: String,
+        source: crate::table::Error,
+    },
+
+    #[snafu(display("Table Error in '{}': {}", table_name, source))]
+    NamedTableError {
+        table_name: String,
+        source: crate::table::Error,
+    },
+
+    #[snafu(display("Table '{}' not found in chunk {}", table_name, chunk_id))]
+    NamedTableNotFoundInChunk { table_name: String, chunk_id: u64 },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
+
 #[derive(Debug)]
 pub struct Chunk {
    /// Partition this chunk belongs to
-    pub partition_key: String,
+    partition_key: String,

    /// The id for this chunk
-    pub id: u32,
+    id: u32,

    /// Tables of this chunk
-    pub tables: Vec<Table>,
+    tables: Vec<Table>,

    /// Track memory used by this chunk
    memory_tracker: MemTracker,
@ -34,9 +56,36 @@ impl Chunk {
        chunk
    }

+    /// Return the chunk id
+    pub fn id(&self) -> u32 {
+        self.id
+    }
+
+    /// Return the chunk's partition key
+    pub fn partition_key(&self) -> &str {
+        self.partition_key.as_ref()
+    }
+
+    /// Return all paths of this chunks
+    pub fn all_paths(&self) -> Vec<Path> {
+        self.tables.iter().map(|t| t.path()).collect()
+    }
+
+    /// Returns a vec of the summary statistics of the tables in this chunk
+    pub fn table_summaries(&self) -> Vec<TableSummary> {
+        self.tables.iter().map(|t| t.table_summary()).collect()
+    }
+
    /// Add a chunk's table and its summary
-    pub fn add_table(&mut self, table_summary: TableSummary, file_location: Path) {
-        self.tables.push(Table::new(table_summary, file_location));
+    pub fn add_table(
+        &mut self,
+        table_summary: TableSummary,
+        file_location: Path,
+        schema: Schema,
+        range: Option<TimestampRange>,
+    ) {
+        self.tables
+            .push(Table::new(table_summary, file_location, schema, range));
    }

    /// Return true if this chunk includes the given table
@ -62,4 +111,33 @@ impl Chunk {

        size + self.partition_key.len() + mem::size_of::<u32>() + mem::size_of::<Self>()
    }
+
+    /// Return Schema for the specified table / columns
+    pub fn table_schema(&self, table_name: &str, selection: Selection<'_>) -> Result<Schema> {
+        let table = self
+            .tables
+            .iter()
+            .find(|t| t.has_table(table_name))
+            .context(NamedTableNotFoundInChunk {
+                table_name,
+                chunk_id: self.id(),
+            })?;
+
+        table
+            .schema(selection)
+            .context(NamedTableError { table_name })
+    }
+
+    pub fn table_names(
+        &self,
+        timestamp_range: Option<TimestampRange>,
+    ) -> impl Iterator<Item = String> + '_ {
+        self.tables.iter().flat_map(move |t| {
+            if t.matches_predicate(&timestamp_range) {
+                Some(t.name())
+            } else {
+                None
+            }
+        })
+    }
 }
--- a/parquet_file/src/table.rs
+++ b/parquet_file/src/table.rs
@ -1,28 +1,57 @@
-use data_types::partition_metadata::TableSummary;
+use snafu::{ResultExt, Snafu};
+use std::mem;
+
+use data_types::{partition_metadata::TableSummary, timestamp::TimestampRange};
+use internal_types::{schema::Schema, selection::Selection};
 use object_store::path::Path;

-use std::mem;
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Failed to select columns: {}", source))]
+    SelectColumns {
+        source: internal_types::schema::Error,
+    },
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;

 /// Table that belongs to a chunk persisted in a parquet file in object store
 #[derive(Debug, Clone)]
 pub struct Table {
    /// Meta data of the table
-    pub table_summary: TableSummary,
+    table_summary: TableSummary,

    /// Path in the object store. Format:
    ///  <writer id>/<database>/data/<partition key>/<chunk
    /// id>/<tablename>.parquet
-    pub object_store_path: Path,
+    object_store_path: Path,
+
+    /// Schema that goes with this table's parquet file
+    table_schema: Schema,
+
+    /// Timestamp rang of this table's parquet file
+    timestamp_range: Option<TimestampRange>,
 }

 impl Table {
-    pub fn new(meta: TableSummary, path: Path) -> Self {
+    pub fn new(
+        meta: TableSummary,
+        path: Path,
+        schema: Schema,
+        range: Option<TimestampRange>,
+    ) -> Self {
        Self {
            table_summary: meta,
            object_store_path: path,
+            table_schema: schema,
+            timestamp_range: range,
        }
    }

+    pub fn table_summary(&self) -> TableSummary {
+        self.table_summary.clone()
+    }
+
    pub fn has_table(&self, table_name: &str) -> bool {
        self.table_summary.has_table(table_name)
    }
@ -32,10 +61,36 @@ impl Table {
        mem::size_of::<Self>()
            + self.table_summary.size()
            + mem::size_of_val(&self.object_store_path)
+            + mem::size_of_val(&self.table_schema)
    }

    /// Return name of this table
    pub fn name(&self) -> String {
        self.table_summary.name.clone()
    }
+
+    /// Return the object store path of this table
+    pub fn path(&self) -> Path {
+        self.object_store_path.clone()
+    }
+
+    /// return schema of this table for specified selection columns
+    pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
+        Ok(match selection {
+            Selection::All => self.table_schema.clone(),
+            Selection::Some(columns) => {
+                let columns = self.table_schema.select(columns).context(SelectColumns)?;
+                self.table_schema.project(&columns)
+            }
+        })
+    }
+
+    pub fn matches_predicate(&self, timestamp_range: &Option<TimestampRange>) -> bool {
+        match (self.timestamp_range, timestamp_range) {
+            (Some(a), Some(b)) => !a.disjoint(b),
+            (None, Some(_)) => false, /* If this chunk doesn't have a time column it can't match */
+            // the predicate
+            (_, None) => true,
+        }
+    }
 }
--- a/query/Cargo.toml
+++ b/query/Cargo.toml
@ -19,7 +19,7 @@ async-trait = "0.1"
 chrono = "0.4"
 croaring = "0.4.5"
 data_types = { path = "../data_types" }
-futures = "0.3.7"
+futures = "0.3"
 influxdb_line_protocol = { path = "../influxdb_line_protocol" }
 internal_types = { path = "../internal_types" }
 parking_lot = "0.11.1"
@ -29,5 +29,9 @@ tokio = { version = "1.0", features = ["macros"] }
 tokio-stream = "0.1.2"
 observability_deps = { path = "../observability_deps" }

+# use libc on unix like platforms to set worker priority in DedicatedExecutor
+[target."cfg(unix)".dependencies.libc]
+version = "0.2"
+
 [dev-dependencies] # In alphabetical order
 test_helpers = { path = "../test_helpers" }
--- a/query/src/exec.rs
+++ b/query/src/exec.rs
@ -8,13 +8,14 @@ pub mod fieldlist;
 mod schema_pivot;
 pub mod seriesset;
 pub mod stringset;
+mod task;
 pub use context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};

 use std::sync::Arc;

 use arrow_deps::{
    arrow::record_batch::RecordBatch,
-    datafusion::{self, logical_plan::LogicalPlan},
+    datafusion::{self, logical_plan::LogicalPlan, physical_plan::ExecutionPlan},
 };
 use counters::ExecutionCounters;

@ -34,6 +35,8 @@ use crate::plan::{
    stringset::StringSetPlan,
 };

+use self::task::{DedicatedExecutor, Error as ExecutorError};
+
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Plan Execution Error: {}", source))]
@ -84,21 +87,29 @@ pub enum Error {
    },

    #[snafu(display("Joining execution task: {}", source))]
-    JoinError { source: tokio::task::JoinError },
+    JoinError { source: ExecutorError },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;

-/// Handles executing plans, and marshalling the results into rust
+/// Handles executing DataFusion plans, and marshalling the results into rust
 /// native structures.
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct Executor {
    counters: Arc<ExecutionCounters>,
+    exec: DedicatedExecutor,
 }

 impl Executor {
-    pub fn new() -> Self {
-        Self::default()
+    /// Creates a new executor with a single dedicated thread pool with
+    /// num_threads
+    pub fn new(num_threads: usize) -> Self {
+        let exec = DedicatedExecutor::new("IOx Executor Thread", num_threads);
+
+        Self {
+            exec,
+            counters: Arc::new(ExecutionCounters::default()),
+        }
    }

    /// Executes this plan and returns the resulting set of strings
@ -148,7 +159,7 @@ impl Executor {
                let (plan_tx, plan_rx) = mpsc::channel(1);
                rx_channels.push(plan_rx);

-                tokio::task::spawn(async move {
+                self.exec.spawn(async move {
                    let SeriesSetPlan {
                        table_name,
                        plan,
@ -161,7 +172,6 @@ impl Executor {

                    let physical_plan = ctx
                        .prepare_plan(&plan)
-                        .await
                        .context(DataFusionPhysicalPlanning)?;

                    let it = ctx
@ -212,13 +222,10 @@ impl Executor {
        let handles = plans
            .into_iter()
            .map(|plan| {
-                let counters = Arc::clone(&self.counters);
-
-                tokio::task::spawn(async move {
-                    let ctx = IOxExecutionContext::new(counters);
+                let ctx = self.new_context();
+                self.exec.spawn(async move {
                    let physical_plan = ctx
                        .prepare_plan(&plan)
-                        .await
                        .context(DataFusionPhysicalPlanning)?;

                    // TODO: avoid this buffering
@ -250,9 +257,18 @@ impl Executor {
        self.run_logical_plans(vec![plan]).await
    }

+    /// Executes the logical plan using DataFusion on a separate
+    /// thread pool and produces RecordBatches
+    pub async fn collect(&self, physical_plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
+        self.new_context()
+            .collect(physical_plan)
+            .await
+            .context(DataFusionExecution)
+    }
+
    /// Create a new execution context, suitable for executing a new query
    pub fn new_context(&self) -> IOxExecutionContext {
-        IOxExecutionContext::new(Arc::clone(&self.counters))
+        IOxExecutionContext::new(self.exec.clone(), Arc::clone(&self.counters))
    }

    /// plans and runs the plans in parallel and collects the results
@ -262,11 +278,10 @@ impl Executor {
            .into_iter()
            .map(|plan| {
                let ctx = self.new_context();
-                // TODO run these on some executor other than the main tokio pool
-                tokio::task::spawn(async move {
+
+                self.exec.spawn(async move {
                    let physical_plan = ctx
                        .prepare_plan(&plan)
-                        .await
                        .context(DataFusionPhysicalPlanning)?;

                    // TODO: avoid this buffering
@ -327,7 +342,7 @@ mod tests {
        let expected_strings = to_set(&["Foo", "Bar"]);
        let plan = StringSetPlan::Known(Arc::clone(&expected_strings));

-        let executor = Executor::default();
+        let executor = Executor::new(1);
        let result_strings = executor.to_string_set(plan).await.unwrap();
        assert_eq!(result_strings, expected_strings);
    }
@ -339,7 +354,7 @@ mod tests {
        let scan = make_plan(schema, vec![]);
        let plan: StringSetPlan = vec![scan].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await.unwrap();

        assert_eq!(results, StringSetRef::new(StringSet::new()));
@ -355,7 +370,7 @@ mod tests {
        let scan = make_plan(schema, vec![batch]);
        let plan: StringSetPlan = vec![scan].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await.unwrap();

        assert_eq!(results, to_set(&["foo", "bar", "baz"]));
@ -374,7 +389,7 @@ mod tests {
        let scan = make_plan(schema, vec![batch1, batch2]);
        let plan: StringSetPlan = vec![scan].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await.unwrap();

        assert_eq!(results, to_set(&["foo", "bar", "baz"]));
@ -397,7 +412,7 @@ mod tests {

        let plan: StringSetPlan = vec![scan1, scan2].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await.unwrap();

        assert_eq!(results, to_set(&["foo", "bar", "baz"]));
@ -417,7 +432,7 @@ mod tests {
        let scan = make_plan(schema, vec![batch]);
        let plan: StringSetPlan = vec![scan].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await;

        let actual_error = match results {
@ -443,7 +458,7 @@ mod tests {
        let scan = make_plan(schema, vec![batch]);
        let plan: StringSetPlan = vec![scan].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await;

        let actual_error = match results {
@ -481,7 +496,7 @@ mod tests {
        let pivot = make_schema_pivot(scan);
        let plan = vec![pivot].into();

-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let results = executor.to_string_set(plan).await.expect("Executed plan");

        assert_eq!(results, to_set(&["f1", "f2"]));
--- a/query/src/exec/context.rs
+++ b/query/src/exec/context.rs
@ -25,7 +25,7 @@ use observability_deps::tracing::debug;
 // Reuse DataFusion error and Result types for this module
 pub use arrow_deps::datafusion::error::{DataFusionError as Error, Result};

-use super::counters::ExecutionCounters;
+use super::{counters::ExecutionCounters, task::DedicatedExecutor};

 // The default catalog name - this impacts what SQL queries use if not specified
 pub const DEFAULT_CATALOG: &str = "public";
@ -77,15 +77,27 @@ impl ExtensionPlanner for IOxExtensionPlanner {
    }
 }

-/// This is an execution context for planning in IOx.
-/// It wraps a DataFusion execution context and incudes
-/// statistical counters.
+/// This is an execution context for planning in IOx.  It wraps a
+/// DataFusion execution context and incudes statistical counters and
+/// a dedicated thread pool.
 ///
-/// Eventually we envision this as also managing resources
-/// and providing visibility into what plans are running
+/// Methods on this struct should be preferred to using the raw
+/// DataFusion functions (such as `collect`) directly.
+///
+/// Eventually we envision this also managing additional resource
+/// types such as Memory and providing visibility into what plans are
+/// running
 pub struct IOxExecutionContext {
    counters: Arc<ExecutionCounters>,
    inner: ExecutionContext,
+
+    /// Dedicated executor for query execution.
+    ///
+    /// DataFusion plans are "CPU" bound and thus can consume tokio
+    /// executors threads for extended periods of time. We use a
+    /// dedicated tokio runtime to run them so that other requests
+    /// can be handled.
+    exec: DedicatedExecutor,
 }

 impl fmt::Debug for IOxExecutionContext {
@ -102,7 +114,7 @@ impl IOxExecutionContext {
    ///
    /// The config is created with a default catalog and schema, but this
    /// can be overridden at a later date
-    pub fn new(counters: Arc<ExecutionCounters>) -> Self {
+    pub fn new(exec: DedicatedExecutor, counters: Arc<ExecutionCounters>) -> Self {
        const BATCH_SIZE: usize = 1000;

        // TBD: Should we be reusing an execution context across all executions?
@ -115,7 +127,11 @@ impl IOxExecutionContext {

        let inner = ExecutionContext::with_config(config);

-        Self { counters, inner }
+        Self {
+            exec,
+            counters,
+            inner,
+        }
    }

    /// returns a reference to the inner datafusion execution context
@ -130,13 +146,13 @@ impl IOxExecutionContext {

    /// Prepare a SQL statement for execution. This assumes that any
    /// tables referenced in the SQL have been registered with this context
-    pub async fn prepare_sql(&mut self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
+    pub fn prepare_sql(&mut self, sql: &str) -> Result<Arc<dyn ExecutionPlan>> {
        let logical_plan = self.inner.sql(sql)?.to_logical_plan();
-        self.prepare_plan(&logical_plan).await
+        self.prepare_plan(&logical_plan)
    }

    /// Prepare (optimize + plan) a pre-created logical plan for execution
-    pub async fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
+    pub fn prepare_plan(&self, plan: &LogicalPlan) -> Result<Arc<dyn ExecutionPlan>> {
        debug!(
            "Creating plan: Initial plan\n----\n{}\n{}\n----",
            plan.display_indent_schema(),
@ -154,13 +170,16 @@ impl IOxExecutionContext {
        self.inner.create_physical_plan(&plan)
    }

-    /// Executes the logical plan using DataFusion and produces RecordBatches
+    /// Executes the logical plan using DataFusion on a separate
+    /// thread pool and produces RecordBatches
    pub async fn collect(&self, physical_plan: Arc<dyn ExecutionPlan>) -> Result<Vec<RecordBatch>> {
        self.counters.inc_plans_run();

        debug!("Running plan, physical:\n{:?}", physical_plan);

-        collect(physical_plan).await
+        self.exec.spawn(collect(physical_plan)).await.map_err(|e| {
+            Error::Execution(format!("Error running IOxExecutionContext::collect: {}", e))
+        })?
    }

    /// Executes the physical plan and produces a RecordBatchStream to stream
@ -169,14 +188,21 @@ impl IOxExecutionContext {
        &self,
        physical_plan: Arc<dyn ExecutionPlan>,
    ) -> Result<SendableRecordBatchStream> {
-        if physical_plan.output_partitioning().partition_count() <= 1 {
-            physical_plan.execute(0).await
-        } else {
-            // merge into a single partition
-            let plan = MergeExec::new(physical_plan);
-            // MergeExec must produce a single partition
-            assert_eq!(1, plan.output_partitioning().partition_count());
-            plan.execute(0).await
-        }
+        self.exec
+            .spawn(async move {
+                if physical_plan.output_partitioning().partition_count() <= 1 {
+                    physical_plan.execute(0).await
+                } else {
+                    // merge into a single partition
+                    let plan = MergeExec::new(physical_plan);
+                    // MergeExec must produce a single partition
+                    assert_eq!(1, plan.output_partitioning().partition_count());
+                    plan.execute(0).await
+                }
+            })
+            .await
+            .map_err(|e| {
+                Error::Execution(format!("Error running IOxExecutionContext::execute: {}", e))
+            })?
    }
 }
--- a/query/src/exec/task.rs
+++ b/query/src/exec/task.rs
@ -0,0 +1,344 @@
+//! This module contains a dedicated thread pool for running "cpu
+//! intensive" workloads such as DataFusion plans
+
+use parking_lot::Mutex;
+use std::{pin::Pin, sync::Arc};
+use tokio::sync::oneshot::Receiver;
+
+use futures::Future;
+
+use observability_deps::tracing::warn;
+
+/// The type of thing that the dedicated executor runs
+type Task = Pin<Box<dyn Future<Output = ()> + Send>>;
+
+/// The type of error that is returned from tasks in this module
+pub type Error = tokio::sync::oneshot::error::RecvError;
+
+/// Runs futures (and any `tasks` that are `tokio::task::spawned` by
+/// them) on a separate tokio Executor
+#[derive(Clone)]
+pub struct DedicatedExecutor {
+    state: Arc<Mutex<State>>,
+}
+
+/// Runs futures (and any `tasks` that are `tokio::task::spawned` by
+/// them) on a separate tokio Executor
+struct State {
+    /// Channel for requests -- the dedicated executor takes requests
+    /// from here and runs them.
+    requests: Option<std::sync::mpsc::Sender<Task>>,
+
+    /// The thread that is doing the work
+    thread: Option<std::thread::JoinHandle<()>>,
+}
+
+/// The default worker priority (value passed to `libc::setpriority`);
+const WORKER_PRIORITY: i32 = 10;
+
+impl std::fmt::Debug for DedicatedExecutor {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Avoid taking the mutex in debug formatting
+        write!(f, "DedicatedExecutor")
+    }
+}
+
+impl DedicatedExecutor {
+    /// Creates a new `DedicatedExecutor` with a dedicated tokio
+    /// executor that is separate from the threadpool created via
+    /// `[tokio::main]` or similar.
+    ///
+    /// The worker thread priority is set to low so that such tasks do
+    /// not starve other more important tasks (such as answering health checks)
+    ///
+    /// Follows the example from to stack overflow and spawns a new
+    /// thread to install a Tokio runtime "context"
+    /// https://stackoverflow.com/questions/62536566
+    ///
+    /// If you try to do this from a async context you see something like
+    /// thread 'plan::stringset::tests::test_builder_plan' panicked at 'Cannot
+    /// drop a runtime in a context where blocking is not allowed. This
+    /// happens when a runtime is dropped from within an asynchronous
+    /// context.', .../tokio-1.4.0/src/runtime/blocking/shutdown.rs:51:21
+    pub fn new(thread_name: &str, num_threads: usize) -> Self {
+        let thread_name = thread_name.to_string();
+
+        let (tx, rx) = std::sync::mpsc::channel();
+
+        let thread = std::thread::spawn(move || {
+            let runtime = tokio::runtime::Builder::new_multi_thread()
+                .enable_all()
+                .thread_name(&thread_name)
+                .worker_threads(num_threads)
+                .on_thread_start(move || set_current_thread_priority(WORKER_PRIORITY))
+                .build()
+                .expect("Creating tokio runtime");
+
+            // By entering the context, all calls to `tokio::spawn` go
+            // to this executor
+            let _guard = runtime.enter();
+
+            while let Ok(request) = rx.recv() {
+                // TODO track the outstanding tasks
+                tokio::task::spawn(request);
+            }
+        });
+
+        let state = State {
+            requests: Some(tx),
+            thread: Some(thread),
+        };
+
+        Self {
+            state: Arc::new(Mutex::new(state)),
+        }
+    }
+
+    /// Runs the specified Future (and any tasks it spawns) on the
+    /// `DedicatedExecutor`.
+    ///
+    /// Currently all tasks are added to the tokio executor
+    /// immediately and compete for the threadpool's resources.
+    pub fn spawn<T>(&self, task: T) -> Receiver<T::Output>
+    where
+        T: Future + Send + 'static,
+        T::Output: Send + 'static,
+    {
+        let (tx, rx) = tokio::sync::oneshot::channel();
+
+        let job = Box::pin(async move {
+            let task_output = task.await;
+            if tx.send(task_output).is_err() {
+                warn!("Spawned task output ignored: receiver dropped")
+            }
+        });
+
+        let mut state = self.state.lock();
+
+        if let Some(requests) = &mut state.requests {
+            // would fail if someone has started shutdown
+            requests.send(job).ok();
+        } else {
+            warn!("tried to schedule task on an executor that was shutdown");
+        }
+
+        rx
+    }
+
+    /// signals shutdown of this executor and any Clones
+    pub fn shutdown(&self) {
+        // hang up the channel which will cause the dedicated thread
+        // to quit
+        let mut state = self.state.lock();
+        state.requests = None;
+    }
+
+    /// Stops all subsequent task executions, and waits for the worker
+    /// thread to complete. Note this will shutdown all clones of this
+    /// `DedicatedExecutor` as well.
+    ///
+    /// Only the first all to `join` will actually wait for the
+    /// executing thread to complete. All other calls to join will
+    /// complete immediately.
+    pub fn join(&self) {
+        self.shutdown();
+
+        // take the thread out when mutex is held
+        let thread = {
+            let mut state = self.state.lock();
+            state.thread.take()
+        };
+
+        // wait for completion while not holding the mutex to avoid
+        // deadlocks
+        if let Some(thread) = thread {
+            thread.join().ok();
+        }
+    }
+}
+
+#[cfg(unix)]
+fn set_current_thread_priority(prio: i32) {
+    // on linux setpriority sets the current thread's priority
+    // (as opposed to the current process).
+    unsafe { libc::setpriority(0, 0, prio) };
+}
+
+#[cfg(not(unix))]
+fn set_current_thread_priority(prio: i32) {
+    warn!("Setting worker thread priority not supported on this platform");
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::{Arc, Barrier};
+
+    #[cfg(unix)]
+    fn get_current_thread_priority() -> i32 {
+        // on linux setpriority sets the current thread's priority
+        // (as opposed to the current process).
+        unsafe { libc::getpriority(0, 0) }
+    }
+
+    #[cfg(not(unix))]
+    fn get_current_thread_priority() -> i32 {
+        WORKER_PRIORITY
+    }
+
+    #[tokio::test]
+    async fn basic() {
+        let barrier = Arc::new(Barrier::new(2));
+
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        let dedicated_task = exec.spawn(do_work(42, Arc::clone(&barrier)));
+
+        // Note the dedicated task will never complete if it runs on
+        // the main tokio thread (as this test is not using the
+        // 'multithreaded' version of the executor and the call to
+        // barrier.wait actually blocks the tokio thread)
+        barrier.wait();
+
+        // should be able to get the result
+        assert_eq!(dedicated_task.await.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn basic_clone() {
+        let barrier = Arc::new(Barrier::new(2));
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        // Run task on clone should work fine
+        let dedicated_task = exec.clone().spawn(do_work(42, Arc::clone(&barrier)));
+        barrier.wait();
+        assert_eq!(dedicated_task.await.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn multi_task() {
+        let barrier = Arc::new(Barrier::new(3));
+
+        // make an executor with two threads
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
+        let dedicated_task1 = exec.spawn(do_work(11, Arc::clone(&barrier)));
+        let dedicated_task2 = exec.spawn(do_work(42, Arc::clone(&barrier)));
+
+        // block main thread until completion of other two tasks
+        barrier.wait();
+
+        // should be able to get the result
+        assert_eq!(dedicated_task1.await.unwrap(), 11);
+        assert_eq!(dedicated_task2.await.unwrap(), 42);
+
+        exec.join();
+    }
+
+    #[tokio::test]
+    async fn worker_priority() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
+
+        let dedicated_task = exec.spawn(async move { get_current_thread_priority() });
+
+        assert_eq!(dedicated_task.await.unwrap(), WORKER_PRIORITY);
+    }
+
+    #[tokio::test]
+    async fn tokio_spawn() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 2);
+
+        // spawn a task that spawns to other tasks and ensure they run on the dedicated
+        // executor
+        let dedicated_task = exec.spawn(async move {
+            // spawn separate tasks
+            let t1 = tokio::task::spawn(async {
+                assert_eq!(
+                    std::thread::current().name(),
+                    Some("Test DedicatedExecutor")
+                );
+                25usize
+            });
+            t1.await.unwrap()
+        });
+
+        // Validate the inner task ran to completion (aka it did not panic)
+        assert_eq!(dedicated_task.await.unwrap(), 25);
+    }
+
+    #[tokio::test]
+    async fn panic_on_executor() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        let dedicated_task = exec.spawn(async move {
+            if true {
+                panic!("At the disco, on the dedicated task scheduler");
+            } else {
+                42
+            }
+        });
+
+        // should not be able to get the result
+        dedicated_task.await.unwrap_err();
+    }
+
+    #[tokio::test]
+    async fn executor_shutdown_while_task_running() {
+        let barrier = Arc::new(Barrier::new(2));
+
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        let dedicated_task = exec.spawn(do_work(42, Arc::clone(&barrier)));
+
+        exec.shutdown();
+        // block main thread until completion of the outstanding task
+        barrier.wait();
+
+        // task should complete successfully
+        assert_eq!(dedicated_task.await.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn executor_submit_task_after_shutdown() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+
+        // Simulate trying to submit tasks once executor has shutdown
+        exec.shutdown();
+        let dedicated_task = exec.spawn(async { 11 });
+
+        // task should complete, but return an error
+        dedicated_task.await.unwrap_err();
+    }
+
+    #[tokio::test]
+    async fn executor_submit_task_after_clone_shutdown() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+
+        // shutdown the clone (but not the exec)
+        exec.clone().join();
+
+        // Simulate trying to submit tasks once executor has shutdown
+        let dedicated_task = exec.spawn(async { 11 });
+
+        // task should complete, but return an error
+        dedicated_task.await.unwrap_err();
+    }
+
+    #[tokio::test]
+    async fn executor_join() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        // test it doesn't hang
+        exec.join()
+    }
+
+    #[tokio::test]
+    #[allow(clippy::redundant_clone)]
+    async fn executor_clone_join() {
+        let exec = DedicatedExecutor::new("Test DedicatedExecutor", 1);
+        // test it doesn't hang
+        exec.clone().join();
+        exec.clone().join();
+        exec.join();
+    }
+
+    /// Wait for the barrier and then return `result`
+    async fn do_work(result: usize, barrier: Arc<Barrier>) -> usize {
+        barrier.wait();
+        result
+    }
+}
--- a/query/src/frontend/influxrpc.rs
+++ b/query/src/frontend/influxrpc.rs
@ -195,13 +195,13 @@ impl InfluxRPCPlanner {
    /// Returns a plan that lists the names of tables in this
    /// database that have at least one row that matches the
    /// conditions listed on `predicate`
-    pub async fn table_names<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
+    pub fn table_names<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
    where
        D: Database + 'static,
    {
        let mut builder = StringSetPlanBuilder::new();

-        for chunk in self.filtered_chunks(database, &predicate).await? {
+        for chunk in self.filtered_chunks(database, &predicate)? {
            let new_table_names = chunk
                .table_names(&predicate, builder.known_strings())
                .map_err(|e| Box::new(e) as _)
@ -227,7 +227,7 @@ impl InfluxRPCPlanner {
    /// columns (as defined in the InfluxDB Data model) names in this
    /// database that have more than zero rows which pass the
    /// conditions specified by `predicate`.
-    pub async fn tag_keys<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
+    pub fn tag_keys<D>(&self, database: &D, predicate: Predicate) -> Result<StringSetPlan>
    where
        D: Database + 'static,
    {
@ -246,9 +246,9 @@ impl InfluxRPCPlanner {
        let mut need_full_plans = BTreeMap::new();

        let mut known_columns = BTreeSet::new();
-        for chunk in self.filtered_chunks(database, &predicate).await? {
+        for chunk in self.filtered_chunks(database, &predicate)? {
            // try and get the table names that have rows that match the predicate
-            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
+            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;

            for table_name in table_names {
                debug!(
@ -308,7 +308,7 @@ impl InfluxRPCPlanner {
            // were already known to have data (based on the contents of known_columns)

            for (table_name, chunks) in need_full_plans.into_iter() {
-                let plan = self.tag_keys_plan(&table_name, &predicate, chunks).await?;
+                let plan = self.tag_keys_plan(&table_name, &predicate, chunks)?;

                if let Some(plan) = plan {
                    builder = builder.append(plan)
@ -326,7 +326,7 @@ impl InfluxRPCPlanner {
    /// Returns a plan which finds the distinct, non-null tag values
    /// in the specified `tag_name` column of this database which pass
    /// the conditions specified by `predicate`.
-    pub async fn tag_values<D>(
+    pub fn tag_values<D>(
        &self,
        database: &D,
        tag_name: &str,
@ -351,8 +351,8 @@ impl InfluxRPCPlanner {
        let mut need_full_plans = BTreeMap::new();

        let mut known_values = BTreeSet::new();
-        for chunk in self.filtered_chunks(database, &predicate).await? {
-            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
+        for chunk in self.filtered_chunks(database, &predicate)? {
+            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;

            for table_name in table_names {
                debug!(
@ -426,9 +426,7 @@ impl InfluxRPCPlanner {
        // time in `known_columns`, and some tables in chunks that we
        // need to run a plan to find what values pass the predicate.
        for (table_name, chunks) in need_full_plans.into_iter() {
-            let scan_and_filter = self
-                .scan_and_filter(&table_name, &predicate, chunks)
-                .await?;
+            let scan_and_filter = self.scan_and_filter(&table_name, &predicate, chunks)?;

            // if we have any data to scan, make a plan!
            if let Some(TableScanAndFilter {
@ -471,11 +469,7 @@ impl InfluxRPCPlanner {
    /// datatypes (as defined in the data written via `write_lines`),
    /// and which have more than zero rows which pass the conditions
    /// specified by `predicate`.
-    pub async fn field_columns<D>(
-        &self,
-        database: &D,
-        predicate: Predicate,
-    ) -> Result<FieldListPlan>
+    pub fn field_columns<D>(&self, database: &D, predicate: Predicate) -> Result<FieldListPlan>
    where
        D: Database + 'static,
    {
@ -488,15 +482,12 @@ impl InfluxRPCPlanner {
        // values and stops the plan executing once it has them

        // map table -> Vec<Arc<Chunk>>
-        let chunks = self.filtered_chunks(database, &predicate).await?;
-        let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
+        let chunks = self.filtered_chunks(database, &predicate)?;
+        let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;

        let mut field_list_plan = FieldListPlan::new();
        for (table_name, chunks) in table_chunks {
-            if let Some(plan) = self
-                .field_columns_plan(&table_name, &predicate, chunks)
-                .await?
-            {
+            if let Some(plan) = self.field_columns_plan(&table_name, &predicate, chunks)? {
                field_list_plan = field_list_plan.append(plan);
            }
        }
@ -523,7 +514,7 @@ impl InfluxRPCPlanner {
    /// rows for a particular series (groups where all tags are the
    /// same) occur together in the plan

-    pub async fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
+    pub fn read_filter<D>(&self, database: &D, predicate: Predicate) -> Result<SeriesSetPlans>
    where
        D: Database + 'static,
    {
@ -531,17 +522,15 @@ impl InfluxRPCPlanner {

        // group tables by chunk, pruning if possible
        // key is table name, values are chunks
-        let chunks = self.filtered_chunks(database, &predicate).await?;
-        let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
+        let chunks = self.filtered_chunks(database, &predicate)?;
+        let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;

        // now, build up plans for each table
        let mut ss_plans = Vec::with_capacity(table_chunks.len());
        for (table_name, chunks) in table_chunks {
            let prefix_columns: Option<&[&str]> = None;

-            let ss_plan = self
-                .read_filter_plan(table_name, prefix_columns, &predicate, chunks)
-                .await?;
+            let ss_plan = self.read_filter_plan(table_name, prefix_columns, &predicate, chunks)?;
            // If we have to do real work, add it to the list of plans
            if let Some(ss_plan) = ss_plan {
                ss_plans.push(ss_plan);
@ -555,7 +544,7 @@ impl InfluxRPCPlanner {
    /// with rows grouped by an aggregate function. Note that we still
    /// group by all tags (so group within series) and the
    /// group_columns define the order of the result
-    pub async fn read_group<D>(
+    pub fn read_group<D>(
        &self,
        database: &D,
        predicate: Predicate,
@ -568,8 +557,8 @@ impl InfluxRPCPlanner {
        debug!(predicate=?predicate, agg=?agg, "planning read_group");

        // group tables by chunk, pruning if possible
-        let chunks = self.filtered_chunks(database, &predicate).await?;
-        let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
+        let chunks = self.filtered_chunks(database, &predicate)?;
+        let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;
        let num_prefix_tag_group_columns = group_columns.len();

        // now, build up plans for each table
@ -577,13 +566,9 @@ impl InfluxRPCPlanner {
        for (table_name, chunks) in table_chunks {
            let ss_plan = match agg {
                Aggregate::None => {
-                    self.read_filter_plan(table_name, Some(group_columns), &predicate, chunks)
-                        .await?
-                }
-                _ => {
-                    self.read_group_plan(table_name, &predicate, agg, group_columns, chunks)
-                        .await?
+                    self.read_filter_plan(table_name, Some(group_columns), &predicate, chunks)?
                }
+                _ => self.read_group_plan(table_name, &predicate, agg, group_columns, chunks)?,
            };

            // If we have to do real work, add it to the list of plans
@ -598,7 +583,7 @@ impl InfluxRPCPlanner {

    /// Creates a GroupedSeriesSet plan that produces an output table with rows
    /// that are grouped by window defintions
-    pub async fn read_window_aggregate<D>(
+    pub fn read_window_aggregate<D>(
        &self,
        database: &D,
        predicate: Predicate,
@ -612,15 +597,14 @@ impl InfluxRPCPlanner {
        debug!(predicate=?predicate, "planning read_window_aggregate");

        // group tables by chunk, pruning if possible
-        let chunks = self.filtered_chunks(database, &predicate).await?;
-        let table_chunks = self.group_chunks_by_table(&predicate, chunks).await?;
+        let chunks = self.filtered_chunks(database, &predicate)?;
+        let table_chunks = self.group_chunks_by_table(&predicate, chunks)?;

        // now, build up plans for each table
        let mut ss_plans = Vec::with_capacity(table_chunks.len());
        for (table_name, chunks) in table_chunks {
            let ss_plan = self
-                .read_window_aggregate_plan(table_name, &predicate, agg, &every, &offset, chunks)
-                .await?;
+                .read_window_aggregate_plan(table_name, &predicate, agg, &every, &offset, chunks)?;
            // If we have to do real work, add it to the list of plans
            if let Some(ss_plan) = ss_plan {
                ss_plans.push(ss_plan);
@ -631,7 +615,7 @@ impl InfluxRPCPlanner {
    }

    /// Creates a map of table_name --> Chunks that have that table
-    async fn group_chunks_by_table<C>(
+    fn group_chunks_by_table<C>(
        &self,
        predicate: &Predicate,
        chunks: Vec<Arc<C>>,
@ -641,7 +625,7 @@ impl InfluxRPCPlanner {
    {
        let mut table_chunks = BTreeMap::new();
        for chunk in chunks {
-            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate).await?;
+            let table_names = self.chunk_table_names(chunk.as_ref(), &predicate)?;
            for table_name in table_names {
                table_chunks
                    .entry(table_name)
@ -653,11 +637,7 @@ impl InfluxRPCPlanner {
    }

    /// Find all the table names in the specified chunk that pass the predicate
-    async fn chunk_table_names<C>(
-        &self,
-        chunk: &C,
-        predicate: &Predicate,
-    ) -> Result<BTreeSet<String>>
+    fn chunk_table_names<C>(&self, chunk: &C, predicate: &Predicate) -> Result<BTreeSet<String>>
    where
        C: PartitionChunk + 'static,
    {
@ -705,7 +685,7 @@ impl InfluxRPCPlanner {
    ///    Filter(predicate)
    ///      TableScan (of chunks)
    /// ```
-    async fn tag_keys_plan<C>(
+    fn tag_keys_plan<C>(
        &self,
        table_name: &str,
        predicate: &Predicate,
@ -714,7 +694,7 @@ impl InfluxRPCPlanner {
    where
        C: PartitionChunk + 'static,
    {
-        let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks).await?;
+        let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks)?;

        let TableScanAndFilter {
            plan_builder,
@ -767,7 +747,7 @@ impl InfluxRPCPlanner {
    ///      Filter(predicate) [optional]
    ///        Scan
    /// ```
-    async fn field_columns_plan<C>(
+    fn field_columns_plan<C>(
        &self,
        table_name: &str,
        predicate: &Predicate,
@ -776,7 +756,7 @@ impl InfluxRPCPlanner {
    where
        C: PartitionChunk + 'static,
    {
-        let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks).await?;
+        let scan_and_filter = self.scan_and_filter(table_name, predicate, chunks)?;
        let TableScanAndFilter {
            plan_builder,
            schema,
@ -817,7 +797,7 @@ impl InfluxRPCPlanner {
    ///      Order by (tag_columns, timestamp_column)
    ///        Filter(predicate)
    ///          Scan
-    async fn read_filter_plan<C>(
+    fn read_filter_plan<C>(
        &self,
        table_name: impl Into<String>,
        prefix_columns: Option<&[impl AsRef<str>]>,
@ -828,7 +808,7 @@ impl InfluxRPCPlanner {
        C: PartitionChunk + 'static,
    {
        let table_name = table_name.into();
-        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
+        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;

        let TableScanAndFilter {
            plan_builder,
@ -937,7 +917,7 @@ impl InfluxRPCPlanner {
    ///     GroupBy(gby cols, aggs, time cols)
    ///       Filter(predicate)
    ///          Scan
-    pub async fn read_group_plan<C>(
+    pub fn read_group_plan<C>(
        &self,
        table_name: impl Into<String>,
        predicate: &Predicate,
@ -949,7 +929,7 @@ impl InfluxRPCPlanner {
        C: PartitionChunk + 'static,
    {
        let table_name = table_name.into();
-        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
+        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;

        let TableScanAndFilter {
            plan_builder,
@ -1027,7 +1007,7 @@ impl InfluxRPCPlanner {
    ///      GroupBy(gby: tag columns, window_function; agg: aggregate(field)
    ///        Filter(predicate)
    ///          Scan
-    pub async fn read_window_aggregate_plan<C>(
+    pub fn read_window_aggregate_plan<C>(
        &self,
        table_name: impl Into<String>,
        predicate: &Predicate,
@ -1040,7 +1020,7 @@ impl InfluxRPCPlanner {
        C: PartitionChunk + 'static,
    {
        let table_name = table_name.into();
-        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks).await?;
+        let scan_and_filter = self.scan_and_filter(&table_name, predicate, chunks)?;

        let TableScanAndFilter {
            plan_builder,
@ -1114,7 +1094,7 @@ impl InfluxRPCPlanner {
    ///   Filter(predicate) [optional]
    ///     Scan
    /// ```
-    async fn scan_and_filter<C>(
+    fn scan_and_filter<C>(
        &self,
        table_name: &str,
        predicate: &Predicate,
@ -1190,7 +1170,7 @@ impl InfluxRPCPlanner {

    /// Returns a list of chunks across all partitions which may
    /// contain data that pass the predicate
-    async fn filtered_chunks<D>(
+    fn filtered_chunks<D>(
        &self,
        database: &D,
        predicate: &Predicate,
--- a/query/src/frontend/sql.rs
+++ b/query/src/frontend/sql.rs
@ -84,7 +84,7 @@ impl SQLQueryPlanner {
    /// Plan a SQL query against the data in `database`, and return a
    /// DataFusion physical execution plan. The plan can then be
    /// executed using `executor` in a streaming fashion.
-    pub async fn query<D: CatalogProvider + 'static>(
+    pub fn query<D: CatalogProvider + 'static>(
        &self,
        database: Arc<D>,
        query: &str,
@ -92,6 +92,6 @@ impl SQLQueryPlanner {
    ) -> Result<Arc<dyn ExecutionPlan>> {
        let mut ctx = executor.new_context();
        ctx.inner_mut().register_catalog(DEFAULT_CATALOG, database);
-        ctx.prepare_sql(query).await.context(Preparing)
+        ctx.prepare_sql(query).context(Preparing)
    }
 }
--- a/query/src/lib.rs
+++ b/query/src/lib.rs
@ -10,7 +10,7 @@ use arrow_deps::datafusion::physical_plan::SendableRecordBatchStream;
 use async_trait::async_trait;
 use data_types::chunk::ChunkSummary;
 use exec::{stringset::StringSet, Executor};
-use internal_types::{data::ReplicatedWrite, schema::Schema, selection::Selection};
+use internal_types::{schema::Schema, selection::Selection};

 use std::{fmt::Debug, sync::Arc};

@ -39,9 +39,6 @@ pub trait Database: Debug + Send + Sync {
    type Error: std::error::Error + Send + Sync + 'static;
    type Chunk: PartitionChunk;

-    /// Stores the replicated write into the database.
-    fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error>;
-
    /// Return the partition keys for data in this DB
    fn partition_keys(&self) -> Result<Vec<String>, Self::Error>;

--- a/query/src/plan/stringset.rs
+++ b/query/src/plan/stringset.rs
@ -211,7 +211,7 @@ mod tests {
        let expected_ss = to_string_set(&["foo", "bar", "baz", "from_a_plan"]).into();

        assert!(matches!(plan, StringSetPlan::Plan(_)));
-        let executor = Executor::new();
+        let executor = Executor::new(1);
        let ss = executor.to_string_set(plan).await.unwrap();
        assert_eq!(ss, expected_ss);
    }
--- a/query/src/test.rs
+++ b/query/src/test.rs
@ -18,10 +18,7 @@ use crate::{
    Database, DatabaseStore, PartitionChunk, Predicate,
 };

-use data_types::database_rules::{PartitionTemplate, TemplatePart};
-use influxdb_line_protocol::{parse_lines, ParsedLine};
 use internal_types::{
-    data::{lines_to_replicated_write, ReplicatedWrite},
    schema::{
        builder::{SchemaBuilder, SchemaMerger},
        Schema,
@ -30,10 +27,8 @@ use internal_types::{
 };

 use async_trait::async_trait;
-use chrono::{DateTime, Utc};
-use data_types::database_rules::Partitioner;
 use parking_lot::Mutex;
-use snafu::{OptionExt, ResultExt, Snafu};
+use snafu::{OptionExt, Snafu};
 use std::{collections::BTreeMap, sync::Arc};

 #[derive(Debug, Default)]
@ -43,12 +38,6 @@ pub struct TestDatabase {
    /// Value is map of chunk_id to chunk
    partitions: Mutex<BTreeMap<String, BTreeMap<u32, Arc<TestChunk>>>>,

-    /// Lines which have been written to this database, in order
-    saved_lines: Mutex<Vec<String>>,
-
-    /// Replicated writes which have been written to this database, in order
-    replicated_writes: Mutex<Vec<ReplicatedWrite>>,
-
    /// `column_names` to return upon next request
    column_names: Arc<Mutex<Option<StringSetRef>>>,
 }
@ -74,33 +63,6 @@ impl TestDatabase {
        Self::default()
    }

-    /// Get all lines written to this database
-    pub fn get_lines(&self) -> Vec<String> {
-        self.saved_lines.lock().clone()
-    }
-
-    /// Get all replicated writs to this database
-    pub fn get_writes(&self) -> Vec<ReplicatedWrite> {
-        self.replicated_writes.lock().clone()
-    }
-
-    /// Parse line protocol and add it as new lines to this
-    /// database
-    pub async fn add_lp_string(&self, lp_data: &str) {
-        let parsed_lines = parse_lines(&lp_data)
-            .collect::<Result<Vec<_>, _>>()
-            .unwrap_or_else(|_| panic!("parsing line protocol: {}", lp_data));
-
-        let mut writer = TestLPWriter::default();
-        writer.write_lines(self, &parsed_lines).unwrap();
-
-        // Writes parsed lines into this database
-        let mut saved_lines = self.saved_lines.lock();
-        for line in parsed_lines {
-            saved_lines.push(line.to_string())
-        }
-    }
-
    /// Add a test chunk to the database
    pub fn add_chunk(&self, partition_key: &str, chunk: Arc<TestChunk>) {
        let mut partitions = self.partitions.lock();
@ -132,12 +94,6 @@ impl Database for TestDatabase {
    type Error = TestError;
    type Chunk = TestChunk;

-    /// Adds the replicated write to this database
-    fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error> {
-        self.replicated_writes.lock().push(write.clone());
-        Ok(())
-    }
-
    /// Return the partition keys for data in this DB
    fn partition_keys(&self) -> Result<Vec<String>, Self::Error> {
        let partitions = self.partitions.lock();
@ -448,22 +404,13 @@ impl TestDatabaseStore {
    pub fn new() -> Self {
        Self::default()
    }
-
-    /// Parse line protocol and add it as new lines to the `db_name` database
-    pub async fn add_lp_string(&self, db_name: &str, lp_data: &str) {
-        self.db_or_create(db_name)
-            .await
-            .expect("db_or_create suceeeds")
-            .add_lp_string(lp_data)
-            .await
-    }
 }

 impl Default for TestDatabaseStore {
    fn default() -> Self {
        Self {
            databases: Mutex::new(BTreeMap::new()),
-            executor: Arc::new(Executor::new()),
+            executor: Arc::new(Executor::new(1)),
        }
    }
 }
@ -505,91 +452,3 @@ impl DatabaseStore for TestDatabaseStore {
        Arc::clone(&self.executor)
    }
 }
-
-/// Helper for writing line protocol data directly into test databases
-/// (handles creating sequence numbers and writer ids
-#[derive(Debug, Default)]
-pub struct TestLPWriter {
-    pub writer_id: u32,
-    sequence_number: u64,
-}
-
-impl TestLPWriter {
-    // writes data in LineProtocol format into a database
-    pub fn write_lines<D: Database>(
-        &mut self,
-        database: &D,
-        lines: &[ParsedLine<'_>],
-    ) -> Result<()> {
-        // partitions data in hourly segments
-        let partition_template = PartitionTemplate {
-            parts: vec![TemplatePart::TimeFormat("%Y-%m-%dT%H".to_string())],
-        };
-
-        let write = lines_to_replicated_write(
-            self.writer_id,
-            self.sequence_number,
-            &lines,
-            &partition_template,
-        );
-        self.sequence_number += 1;
-        database
-            .store_replicated_write(&write)
-            .map_err(|e| TestError::DatabaseWrite {
-                source: Box::new(e),
-            })
-    }
-
-    /// Writes line protocol formatted data in lp_data to `database`
-    pub fn write_lp_string<D: Database>(&mut self, database: &D, lp_data: &str) -> Result<()> {
-        let lines = parse_lines(lp_data)
-            .collect::<Result<Vec<_>, _>>()
-            .map_err(|e| Box::new(e) as _)
-            .context(DatabaseWrite)?;
-
-        self.write_lines(database, &lines)
-    }
-
-    /// Writes line protocol formatted data to database and partition
-    pub fn write_lp_to_partition<D: Database>(
-        &mut self,
-        database: &D,
-        lp_data: &str,
-        paritition_key: impl Into<String>,
-    ) {
-        let lines = parse_lines(lp_data).collect::<Result<Vec<_>, _>>().unwrap();
-        self.write_lines_to_partition(database, paritition_key, &lines)
-    }
-
-    /// Writes lines the the given partition
-    pub fn write_lines_to_partition<D: Database>(
-        &mut self,
-        database: &D,
-        partition_key: impl Into<String>,
-        lines: &[ParsedLine<'_>],
-    ) {
-        let partitioner = TestPartitioner {
-            key: partition_key.into(),
-        };
-        let write =
-            lines_to_replicated_write(self.writer_id, self.sequence_number, &lines, &partitioner);
-        self.sequence_number += 1;
-        database.store_replicated_write(&write).unwrap();
-    }
-}
-
-// Outputs a set partition key for testing. Used for parsing line protocol into
-// ReplicatedWrite and setting an explicit partition key for all writes therein.
-struct TestPartitioner {
-    key: String,
-}
-
-impl Partitioner for TestPartitioner {
-    fn partition_key(
-        &self,
-        _line: &ParsedLine<'_>,
-        _default_time: &DateTime<Utc>,
-    ) -> data_types::database_rules::Result<String> {
-        Ok(self.key.clone())
-    }
-}
--- a/read_buffer/src/chunk.rs
+++ b/read_buffer/src/chunk.rs
@ -376,6 +376,29 @@ impl Chunk {
            .collect()
    }

+    /// A helper method for determining the time-range associated with the
+    /// specified table.
+    ///
+    /// A table's schema need not contain a column representing the time,
+    /// however any table that represents data using the InfluxDB model does
+    /// contain a column that represents the timestamp associated with each
+    /// row.
+    ///
+    /// `table_time_range` will return the min and max values for that column
+    /// if the table is using the InfluxDB data-model, otherwise it will return
+    /// `None`. An error will be returned if the table does not exist.
+    pub fn table_time_range(&self, table_name: &str) -> Result<Option<(i64, i64)>> {
+        // read lock on chunk.
+        let chunk_data = self.chunk_data.read().unwrap();
+
+        let table = chunk_data
+            .data
+            .get(table_name)
+            .context(TableNotFound { table_name })?;
+
+        Ok(table.time_range())
+    }
+
    /// Returns a schema object for a `read_filter` operation using the provided
    /// column selection. An error is returned if the specified columns do not
    /// exist.
--- a/read_buffer/src/table.rs
+++ b/read_buffer/src/table.rs
@ -13,12 +13,15 @@ use snafu::{ensure, Snafu};

 use crate::row_group::{self, ColumnName, Predicate, RowGroup};
 use crate::schema::{AggregateType, ColumnType, LogicalDataType, ResultSchema};
-use crate::value::Value;
+use crate::value::{OwnedValue, Scalar, Value};
 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("cannot drop last row group in table; drop table"))]
    EmptyTableError {},

+    #[snafu(display("table does not have InfluxDB timestamp column"))]
+    NoTimestampColumnError {},
+
    #[snafu(display("unsupported column operation on {}: {}", column_name, msg))]
    UnsupportedColumnOperation { msg: String, column_name: String },
 }
@ -151,9 +154,38 @@ impl Table {
        self.table_data.read().unwrap().meta.to_summary(&self.name)
    }

-    /// The time range of all row groups within this table.
+    /// Returns the column range associated with an InfluxDB Timestamp column
+    /// or None if the table's schema does not have such a column.
    pub fn time_range(&self) -> Option<(i64, i64)> {
-        self.table_data.read().unwrap().meta.time_range
+        let table_data = self.table_data.read().unwrap();
+
+        let time_column = table_data
+            .meta
+            .columns
+            .values()
+            .filter(|cm| matches!(cm.typ, crate::schema::ColumnType::Timestamp(_)))
+            .collect::<Vec<_>>();
+
+        if time_column.is_empty() {
+            return None;
+        }
+
+        assert_eq!(time_column.len(), 1); // can only be one timestamp column.
+        let range = &time_column[0].range;
+
+        let (min, max) = match (&range.0, &range.1) {
+            (OwnedValue::Scalar(Scalar::I64(min)), OwnedValue::Scalar(Scalar::I64(max))) => {
+                (min, max)
+            }
+            (min, max) => {
+                panic!(
+                    "invalid range type for timestamp column: ({:?}, {:?})",
+                    min, max
+                );
+            }
+        };
+
+        Some((*min, *max))
    }

    // Helper function used in tests.
@ -612,7 +644,6 @@ impl MetaData {
    }

    pub fn to_summary(&self, table_name: impl Into<String>) -> TableSummary {
-        use crate::value::{OwnedValue, Scalar};
        use data_types::partition_metadata::{ColumnSummary, StatValues, Statistics};
        let columns = self
            .columns
@ -1435,4 +1466,20 @@ west,host-b,100
            vec!["time".to_owned()],
        );
    }
+
+    #[test]
+    fn time_range() {
+        // Build a row group.
+        let mut columns = vec![];
+        let tc = ColumnType::Time(Column::from(&[-29_i64, -100, 3, 2][..]));
+        columns.push((row_group::TIME_COLUMN_NAME.to_string(), tc));
+
+        let rc = ColumnType::Tag(Column::from(&["west", "south", "north", "west"][..]));
+        columns.push(("region".to_string(), rc));
+
+        let rg = RowGroup::new(4, columns);
+        let table = Table::new("cpu".to_owned(), rg);
+
+        assert_eq!(table.time_range().unwrap(), (-100, 3));
+    }
 }
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@ -14,11 +14,12 @@ data_types = { path = "../data_types" }
 # See docs/regenerating_flatbuffers.md about updating generated code when updating the
 # version of the flatbuffers crate
 flatbuffers = "0.8"
-futures = "0.3.7"
+futures = "0.3"
 generated_types = { path = "../generated_types" }
 influxdb_line_protocol = { path = "../influxdb_line_protocol" }
 internal_types = { path = "../internal_types" }
 mutable_buffer = { path = "../mutable_buffer" }
+num_cpus = "1.13.0"
 object_store = { path = "../object_store" }
 observability_deps = { path = "../observability_deps" }
 parking_lot = "0.11.1"
@ -35,4 +36,12 @@ tracker = { path = "../tracker" }
 uuid = { version = "0.8", features = ["serde", "v4"] }

 [dev-dependencies] # In alphabetical order
+criterion = { version = "0.3.4", features = ["async_tokio"] }
+flate2 = "1.0.20"
+tempfile = "3.1.0"
 test_helpers = { path = "../test_helpers" }
+
+[[bench]]
+name = "influxrpc"
+harness = false
+
--- a/server/benches/influxrpc.rs
+++ b/server/benches/influxrpc.rs
@ -0,0 +1,8 @@
+mod tag_values;
+
+use criterion::{criterion_group, criterion_main};
+
+use tag_values::benchmark_tag_values;
+
+criterion_group!(benches, benchmark_tag_values);
+criterion_main!(benches);
--- a/server/benches/tag_values.rs
+++ b/server/benches/tag_values.rs
@ -0,0 +1,122 @@
+use std::io::Read;
+
+use arrow_deps::datafusion::{logical_plan::Expr, scalar::ScalarValue};
+use criterion::{BenchmarkId, Criterion};
+// This is a struct that tells Criterion.rs to use the "futures" crate's
+// current-thread executor
+use flate2::read::GzDecoder;
+use tokio::runtime::Runtime;
+
+use query::frontend::influxrpc::InfluxRPCPlanner;
+use query::predicate::PredicateBuilder;
+use query::{exec::Executor, predicate::Predicate};
+use server::{benchmarks::scenarios::DBScenario, db::Db};
+
+// Uses the `query_tests` module to generate some chunk scenarios, specifically
+// the scenarios where there are:
+//
+// - a single open mutable buffer chunk;
+// - a closed mutable buffer chunk and another open one;
+// - an open mutable buffer chunk and a closed read buffer chunk;
+// - two closed read buffer chunks.
+//
+// The chunks are all fed the *same* line protocol, so these benchmarks are
+// useful for assessig the differences in performance between querying the
+// chunks held in different execution engines.
+//
+// These benchmarks use a synthetically generated set of line protocol using
+// `inch`. Each point is a new series containing three tag keys. Those tag keys
+// are:
+//
+//   - tag0, cardinality 10.
+//   - tag1, cardinality 100.
+//   - tag2, cardinality 1,000.
+//
+// The timespan of the points in the line protocol is around 1m or wall-clock
+// time.
+async fn setup_scenarios() -> Vec<DBScenario> {
+    let raw = include_bytes!("../../tests/fixtures/lineproto/tag_values.lp.gz");
+    let mut gz = GzDecoder::new(&raw[..]);
+    let mut lp = String::new();
+    gz.read_to_string(&mut lp).unwrap();
+
+    let db =
+        server::benchmarks::scenarios::make_two_chunk_scenarios("2021-04-12T17", &lp, &lp).await;
+    db
+}
+
+// Run all benchmarks for `tag_values`.
+pub fn benchmark_tag_values(c: &mut Criterion) {
+    let scenarios = Runtime::new().unwrap().block_on(setup_scenarios());
+
+    execute_benchmark_group(c, scenarios.as_slice());
+}
+
+// Runs an async criterion benchmark against the provided scenarios and
+// predicate.
+fn execute_benchmark_group(c: &mut Criterion, scenarios: &[DBScenario]) {
+    let planner = InfluxRPCPlanner::new();
+
+    let predicates = vec![
+        (PredicateBuilder::default().build(), "no_pred"),
+        (
+            PredicateBuilder::default()
+                .add_expr(
+                    Expr::Column("tag2".to_owned()).eq(Expr::Literal(ScalarValue::Utf8(Some(
+                        "value321".to_owned(),
+                    )))),
+                )
+                .build(),
+            "with_pred",
+        ),
+    ];
+
+    // these tags have different cardinalities: 10, 100, 1000.
+    let tag_keys = &["tag0", "tag1", "tag2"];
+
+    for scenario in scenarios {
+        let DBScenario { scenario_name, db } = scenario;
+        let mut group = c.benchmark_group(scenario_name);
+
+        for (predicate, pred_name) in &predicates {
+            for tag_key in tag_keys {
+                group.bench_with_input(
+                    BenchmarkId::from_parameter(format!("{}/{}", tag_key, pred_name)),
+                    tag_key,
+                    |b, &tag_key| {
+                        let executor = db.executor();
+                        b.to_async(Runtime::new().unwrap()).iter(|| {
+                            run_tag_values_query(
+                                &planner,
+                                executor.as_ref(),
+                                db,
+                                tag_key,
+                                predicate.clone(),
+                            )
+                        });
+                    },
+                );
+            }
+        }
+
+        group.finish();
+    }
+}
+
+// Plans and runs a tag_values query.
+async fn run_tag_values_query(
+    planner: &InfluxRPCPlanner,
+    executor: &Executor,
+    db: &Db,
+    tag_key: &str,
+    predicate: Predicate,
+) {
+    let plan = planner
+        .tag_values(db, &tag_key, predicate)
+        .expect("built plan successfully");
+    let names = executor.to_string_set(plan).await.expect(
+        "converted plan to strings
+                            successfully",
+    );
+    assert!(names.len() > 0);
+}
--- a/server/src/config.rs
+++ b/server/src/config.rs
@ -9,6 +9,7 @@ use data_types::{
    DatabaseName,
 };
 use object_store::{path::ObjectStorePath, ObjectStore};
+use query::exec::Executor;

 /// This module contains code for managing the configuration of the server.
 use crate::{db::Db, Error, JobRegistry, Result};
@ -114,7 +115,13 @@ impl Config {
        state.remotes.remove(&id)
    }

-    fn commit(&self, rules: DatabaseRules, server_id: NonZeroU32, object_store: Arc<ObjectStore>) {
+    fn commit(
+        &self,
+        rules: DatabaseRules,
+        server_id: NonZeroU32,
+        object_store: Arc<ObjectStore>,
+        exec: Arc<Executor>,
+    ) {
        let mut state = self.state.write().expect("mutex poisoned");
        let name = state
            .reservations
@ -131,6 +138,7 @@ impl Config {
            rules,
            server_id,
            object_store,
+            exec,
            wal_buffer,
            Arc::clone(&self.jobs),
        ));
@ -253,9 +261,14 @@ pub(crate) struct CreateDatabaseHandle<'a> {
 }

 impl<'a> CreateDatabaseHandle<'a> {
-    pub(crate) fn commit(mut self, server_id: NonZeroU32, object_store: Arc<ObjectStore>) {
+    pub(crate) fn commit(
+        mut self,
+        server_id: NonZeroU32,
+        object_store: Arc<ObjectStore>,
+        exec: Arc<Executor>,
+    ) {
        self.config
-            .commit(self.rules.take().unwrap(), server_id, object_store)
+            .commit(self.rules.take().unwrap(), server_id, object_store, exec)
    }

    pub(crate) fn rules(&self) -> &DatabaseRules {
@ -292,7 +305,8 @@ mod test {
        let db_reservation = config.create_db(rules).unwrap();
        let server_id = NonZeroU32::new(1).unwrap();
        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        db_reservation.commit(server_id, store);
+        let exec = Arc::new(Executor::new(1));
+        db_reservation.commit(server_id, store, exec);
        assert!(config.db(&name).is_some());
        assert_eq!(config.db_names_sorted(), vec![name.clone()]);

@ -318,7 +332,8 @@ mod test {
        let db_reservation = config.create_db(rules).unwrap();
        let server_id = NonZeroU32::new(1).unwrap();
        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        db_reservation.commit(server_id, store);
+        let exec = Arc::new(Executor::new(1));
+        db_reservation.commit(server_id, store, exec);

        let token = config
            .state
--- a/server/src/db.rs
+++ b/server/src/db.rs
@ -3,6 +3,7 @@

 use std::any::Any;
 use std::{
+    convert::TryInto,
    num::NonZeroU32,
    sync::{
        atomic::{AtomicU64, AtomicUsize, Ordering},
@ -15,20 +16,24 @@ use observability_deps::tracing::{debug, info};
 use parking_lot::{Mutex, RwLock};
 use snafu::{ensure, OptionExt, ResultExt, Snafu};

-use arrow_deps::datafusion::{
-    catalog::{catalog::CatalogProvider, schema::SchemaProvider},
-    physical_plan::SendableRecordBatchStream,
+use arrow_deps::{
+    arrow::datatypes::SchemaRef as ArrowSchemaRef,
+    datafusion::{
+        catalog::{catalog::CatalogProvider, schema::SchemaProvider},
+        physical_plan::SendableRecordBatchStream,
+    },
 };

 use catalog::{chunk::ChunkState, Catalog};
 pub(crate) use chunk::DBChunk;
 use data_types::{
    chunk::ChunkSummary, database_rules::DatabaseRules, partition_metadata::PartitionSummary,
+    timestamp::TimestampRange,
 };
-use internal_types::{data::ReplicatedWrite, selection::Selection};
+use internal_types::selection::Selection;
 use object_store::ObjectStore;
 use parquet_file::{chunk::Chunk, storage::Storage};
-use query::{Database, DEFAULT_SCHEMA};
+use query::{exec::Executor, Database, DEFAULT_SCHEMA};
 use read_buffer::Chunk as ReadBufferChunk;
 use tracker::{MemRegistry, TaskTracker, TrackedFutureExt};

@ -36,6 +41,7 @@ use super::{buffer::Buffer, JobRegistry};
 use data_types::job::Job;

 use data_types::partition_metadata::TableSummary;
+use internal_types::entry::{self, ClockValue, Entry, SequencedEntry};
 use lifecycle::LifecycleManager;
 use system_tables::{SystemSchemaProvider, SYSTEM_SCHEMA};

@ -114,6 +120,18 @@ pub enum Error {
        chunk_id: u32,
    },

+    #[snafu(display("Read Buffer Schema Error in chunk {}: {}", chunk_id, source))]
+    ReadBufferChunkSchemaError {
+        source: read_buffer::Error,
+        chunk_id: u32,
+    },
+
+    #[snafu(display("Read Buffer Timestamp Error in chunk {}: {}", chunk_id, source))]
+    ReadBufferChunkTimestampError {
+        chunk_id: u32,
+        source: read_buffer::Error,
+    },
+
    #[snafu(display("Error writing to object store: {}", source))]
    WritingToObjectStore {
        source: parquet_file::storage::Error,
@ -131,6 +149,14 @@ pub enum Error {
        chunk_id: u32,
        source: mutable_buffer::chunk::Error,
    },
+
+    #[snafu(display("Error building sequenced entry: {}", source))]
+    SequencedEntryError { source: entry::Error },
+
+    #[snafu(display("Error building sequenced entry: {}", source))]
+    SchemaConversion {
+        source: internal_types::schema::Error,
+    },
 }
 pub type Result<T, E = Error> = std::result::Result<T, E>;

@ -197,8 +223,12 @@ pub struct Db {

    pub server_id: NonZeroU32, // this is also the Query Server ID

+    /// Interface to use for peristence
    pub store: Arc<ObjectStore>,

+    /// Executor for running queries
+    exec: Arc<Executor>,
+
    /// The catalog holds chunks of data under partitions for the database.
    /// The underlying chunks may be backed by different execution engines
    /// depending on their stage in the data lifecycle. Currently there are
@ -245,6 +275,7 @@ impl Db {
        rules: DatabaseRules,
        server_id: NonZeroU32,
        object_store: Arc<ObjectStore>,
+        exec: Arc<Executor>,
        wal_buffer: Option<Buffer>,
        jobs: Arc<JobRegistry>,
    ) -> Self {
@ -258,6 +289,7 @@ impl Db {
            rules,
            server_id,
            store,
+            exec,
            catalog,
            wal_buffer,
            jobs,
@ -268,6 +300,11 @@ impl Db {
        }
    }

+    /// Return a handle to the executor used to run queries
+    pub fn executor(&self) -> Arc<Executor> {
+        Arc::clone(&self.exec)
+    }
+
    /// Rolls over the active chunk in the database's specified
    /// partition. Returns the previously open (now closed) Chunk
    pub async fn rollover_partition(&self, partition_key: &str) -> Result<Arc<DBChunk>> {
@ -421,7 +458,7 @@ impl Db {
        Ok(DBChunk::snapshot(&chunk))
    }

-    pub async fn load_chunk_to_object_store(
+    pub async fn write_chunk_to_object_store(
        &self,
        partition_key: &str,
        chunk_id: u32,
@ -480,17 +517,19 @@ impl Db {
            let predicate = read_buffer::Predicate::default();

            // Get RecordBatchStream of data from the read buffer chunk
-            // TODO: When we have the rb_chunk, the following code will be replaced with one
-            // line let stream = rb_chunk.read_filter()
            let read_results = rb_chunk
                .read_filter(stats.name.as_str(), predicate, Selection::All)
                .context(ReadBufferChunkError { chunk_id })?;
-            let schema = rb_chunk
+            let arrow_schema: ArrowSchemaRef = rb_chunk
                .read_filter_table_schema(stats.name.as_str(), Selection::All)
-                .context(ReadBufferChunkError { chunk_id })?
+                .context(ReadBufferChunkSchemaError { chunk_id })?
                .into();
-            let stream: SendableRecordBatchStream =
-                Box::pin(streams::ReadFilterResultsStream::new(read_results, schema));
+            let time_range = rb_chunk
+                .table_time_range(stats.name.as_str())
+                .context(ReadBufferChunkTimestampError { chunk_id })?;
+            let stream: SendableRecordBatchStream = Box::pin(
+                streams::ReadFilterResultsStream::new(read_results, Arc::clone(&arrow_schema)),
+            );

            // Write this table data into the object store
            let path = storage
@ -504,7 +543,20 @@ impl Db {
                .context(WritingToObjectStore)?;

            // Now add the saved info into the parquet_chunk
-            parquet_chunk.add_table(stats, path);
+            let schema = Arc::clone(&arrow_schema)
+                .try_into()
+                .context(SchemaConversion)?;
+            let table_time_range = match time_range {
+                None => None,
+                Some((start, end)) => {
+                    if start < end {
+                        Some(TimestampRange::new(start, end))
+                    } else {
+                        None
+                    }
+                }
+            };
+            parquet_chunk.add_table(stats, path, schema, table_time_range);
        }

        // Relock the chunk again (nothing else should have been able
@ -524,7 +576,8 @@ impl Db {
        Ok(DBChunk::snapshot(&chunk))
    }

-    /// Spawns a task to perform load_chunk_to_read_buffer
+    /// Spawns a task to perform
+    /// [`load_chunk_to_read_buffer`](Self::load_chunk_to_read_buffer)
    pub fn load_chunk_to_read_buffer_in_background(
        self: &Arc<Self>,
        partition_key: String,
@ -558,6 +611,41 @@ impl Db {
        tracker
    }

+    /// Spawns a task to perform
+    /// [`write_chunk_to_object_store`](Self::write_chunk_to_object_store)
+    pub fn write_chunk_to_object_store_in_background(
+        self: &Arc<Self>,
+        partition_key: String,
+        chunk_id: u32,
+    ) -> TaskTracker<Job> {
+        let name = self.rules.read().name.clone();
+        let (tracker, registration) = self.jobs.register(Job::WriteChunk {
+            db_name: name.to_string(),
+            partition_key: partition_key.clone(),
+            chunk_id,
+        });
+
+        let captured = Arc::clone(&self);
+        let task = async move {
+            debug!(%name, %partition_key, %chunk_id, "background task loading chunk to object store");
+            let result = captured
+                .write_chunk_to_object_store(&partition_key, chunk_id)
+                .await;
+            if let Err(e) = result {
+                info!(?e, %name, %partition_key, %chunk_id, "background task error loading object store chunk");
+                return Err(e);
+            }
+
+            debug!(%name, %partition_key, %chunk_id, "background task completed writing chunk to object store");
+
+            Ok(())
+        };
+
+        tokio::spawn(task.track(registration));
+
+        tracker
+    }
+
    /// Returns the next write sequence number
    pub fn next_sequence(&self) -> u64 {
        self.sequence.fetch_add(1, Ordering::SeqCst)
@ -624,6 +712,79 @@ impl Db {

        info!("finished background worker");
    }
+
+    /// Stores an entry based on the configuration. The Entry will first be
+    /// converted into a Sequenced Entry with the logical clock assigned
+    /// from the database. If the write buffer is configured, the sequenced
+    /// entry is written into the buffer and replicated based on the
+    /// configured rules. If the mutable buffer is configured, the sequenced
+    /// entry is then written into the mutable buffer.
+    pub fn store_entry(&self, entry: Entry) -> Result<()> {
+        // TODO: build this based on either this or on the write buffer, if configured
+        let sequenced_entry = SequencedEntry::new_from_entry_bytes(
+            ClockValue::new(self.next_sequence()),
+            self.server_id.get(),
+            entry.data(),
+        )
+        .context(SequencedEntryError)?;
+
+        if self.rules.read().wal_buffer_config.is_some() {
+            todo!("route to the Write Buffer. TODO: carols10cents #1157")
+        }
+
+        self.store_sequenced_entry(sequenced_entry)
+    }
+
+    pub fn store_sequenced_entry(&self, sequenced_entry: SequencedEntry) -> Result<()> {
+        let rules = self.rules.read();
+        let mutable_size_threshold = rules.lifecycle_rules.mutable_size_threshold;
+        if rules.lifecycle_rules.immutable {
+            return DatabaseNotWriteable {}.fail();
+        }
+        std::mem::drop(rules);
+
+        // TODO: Direct writes to closing chunks
+
+        if let Some(partitioned_writes) = sequenced_entry.partition_writes() {
+            for write in partitioned_writes {
+                let partition_key = write.key();
+                let partition = self.catalog.get_or_create_partition(partition_key);
+                let mut partition = partition.write();
+                partition.update_last_write_at();
+
+                let chunk = partition.open_chunk().unwrap_or_else(|| {
+                    partition.create_open_chunk(self.memory_registries.mutable_buffer.as_ref())
+                });
+
+                let mut chunk = chunk.write();
+                chunk.record_write();
+                let chunk_id = chunk.id();
+
+                let mb_chunk = chunk.mutable_buffer().expect("cannot mutate open chunk");
+
+                mb_chunk
+                    .write_table_batches(
+                        sequenced_entry.clock_value(),
+                        sequenced_entry.writer_id(),
+                        &write.table_batches(),
+                    )
+                    .context(WriteEntry {
+                        partition_key,
+                        chunk_id,
+                    })?;
+
+                let size = mb_chunk.size();
+
+                if let Some(threshold) = mutable_size_threshold {
+                    if size > threshold.get() {
+                        chunk.set_closing().expect("cannot close open chunk")
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
 }

 #[async_trait]
@ -652,54 +813,6 @@ impl Database for Db {
            .collect()
    }

-    fn store_replicated_write(&self, write: &ReplicatedWrite) -> Result<(), Self::Error> {
-        let rules = self.rules.read();
-        let mutable_size_threshold = rules.lifecycle_rules.mutable_size_threshold;
-        if rules.lifecycle_rules.immutable {
-            return DatabaseNotWriteable {}.fail();
-        }
-        std::mem::drop(rules);
-
-        let entries = match write.write_buffer_batch().and_then(|batch| batch.entries()) {
-            Some(entries) => entries,
-            None => return Ok(()),
-        };
-
-        // TODO: Direct writes to closing chunks
-
-        for entry in entries.into_iter() {
-            if let Some(partition_key) = entry.partition_key() {
-                let partition = self.catalog.get_or_create_partition(partition_key);
-                let mut partition = partition.write();
-                partition.update_last_write_at();
-
-                let chunk = partition.open_chunk().unwrap_or_else(|| {
-                    partition.create_open_chunk(self.memory_registries.mutable_buffer.as_ref())
-                });
-
-                let mut chunk = chunk.write();
-                chunk.record_write();
-                let chunk_id = chunk.id();
-
-                let mb_chunk = chunk.mutable_buffer().expect("cannot mutate open chunk");
-
-                mb_chunk.write_entry(&entry).context(WriteEntry {
-                    partition_key,
-                    chunk_id,
-                })?;
-
-                let size = mb_chunk.size();
-
-                if let Some(threshold) = mutable_size_threshold {
-                    if size > threshold.get() {
-                        chunk.set_closing().expect("cannot close open chunk")
-                    }
-                }
-            }
-        }
-        Ok(())
-    }
-
    fn partition_keys(&self) -> Result<Vec<String>, Self::Error> {
        Ok(self.catalog.partition_keys())
    }
@ -731,10 +844,25 @@ impl CatalogProvider for Db {
    }
 }

+pub mod test_helpers {
+    use super::*;
+    use internal_types::entry::test_helpers::lp_to_entries;
+
+    pub fn write_lp(db: &Db, lp: &str) {
+        let entries = lp_to_entries(lp);
+        for entry in entries {
+            db.store_entry(entry).unwrap();
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use crate::query_tests::utils::{make_database, make_db};
+    use ::test_helpers::assert_contains;
    use arrow_deps::{
-        arrow::record_batch::RecordBatch, assert_table_eq, datafusion::physical_plan::collect,
+        arrow::record_batch::RecordBatch, assert_batches_sorted_eq, assert_table_eq,
+        datafusion::execution::context,
    };
    use chrono::Utc;
    use data_types::{
@ -742,23 +870,32 @@ mod tests {
        database_rules::{Order, Sort, SortOrder},
        partition_metadata::{ColumnSummary, StatValues, Statistics, TableSummary},
    };
-    use query::{
-        exec::Executor, frontend::sql::SQLQueryPlanner, test::TestLPWriter, PartitionChunk,
+    use object_store::{
+        disk::File, path::ObjectStorePath, path::Path, ObjectStore, ObjectStoreApi,
    };
-    use test_helpers::assert_contains;
-
-    use crate::query_tests::utils::make_db;
+    use query::{frontend::sql::SQLQueryPlanner, PartitionChunk};

    use super::*;
+    use futures::stream;
+    use futures::{StreamExt, TryStreamExt};
+    use std::iter::Iterator;
+
+    use super::test_helpers::write_lp;
+    use internal_types::entry::test_helpers::lp_to_entry;
    use std::num::NonZeroUsize;
+    use std::str;
+    use tempfile::TempDir;
+
+    type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
+    type Result<T, E = Error> = std::result::Result<T, E>;

    #[tokio::test]
    async fn write_no_mutable_buffer() {
        // Validate that writes are rejected if there is no mutable buffer
        let db = make_db();
-        let mut writer = TestLPWriter::default();
        db.rules.write().lifecycle_rules.immutable = true;
-        let res = writer.write_lp_string(&db, "cpu bar=1 10");
+        let entry = lp_to_entry("cpu bar=1 10");
+        let res = db.store_entry(entry);
        assert_contains!(
            res.unwrap_err().to_string(),
            "Cannot write to this database: no mutable buffer configured"
@ -768,8 +905,7 @@ mod tests {
    #[tokio::test]
    async fn read_write() {
        let db = Arc::new(make_db());
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
+        write_lp(db.as_ref(), "cpu bar=1 10");

        let batches = run_query(db, "select * from cpu").await;

@ -786,9 +922,7 @@ mod tests {
    #[tokio::test]
    async fn write_with_rollover() {
        let db = Arc::new(make_db());
-        let mut writer = TestLPWriter::default();
-        //writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
-        writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
+        write_lp(db.as_ref(), "cpu bar=1 10");
        assert_eq!(vec!["1970-01-01T00"], db.partition_keys().unwrap());

        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
@ -802,10 +936,10 @@ mod tests {
            "+-----+------+",
        ];
        let batches = run_query(Arc::clone(&db), "select * from cpu").await;
-        assert_table_eq!(expected, &batches);
+        assert_batches_sorted_eq!(expected, &batches);

        // add new data
-        writer.write_lp_string(db.as_ref(), "cpu bar=2 20").unwrap();
+        write_lp(db.as_ref(), "cpu bar=2 20");
        let expected = vec![
            "+-----+------+",
            "| bar | time |",
@ -815,20 +949,19 @@ mod tests {
            "+-----+------+",
        ];
        let batches = run_query(Arc::clone(&db), "select * from cpu").await;
-        assert_table_eq!(&expected, &batches);
+        assert_batches_sorted_eq!(&expected, &batches);

        // And expect that we still get the same thing when data is rolled over again
        let chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
        assert_eq!(chunk.id(), 1);

        let batches = run_query(db, "select * from cpu").await;
-        assert_table_eq!(&expected, &batches);
+        assert_batches_sorted_eq!(&expected, &batches);
    }

    #[tokio::test]
    async fn write_with_missing_tags_are_null() {
        let db = Arc::new(make_db());
-        let mut writer = TestLPWriter::default();
        // Note the `region` tag is introduced in the second line, so
        // the values in prior rows for the region column are
        // null. Likewise the `core` tag is introduced in the third
@ -839,9 +972,7 @@ mod tests {
            "cpu,core=one user=10.0 11",
        ];

-        writer
-            .write_lp_string(db.as_ref(), &lines.join("\n"))
-            .unwrap();
+        write_lp(db.as_ref(), &lines.join("\n"));
        assert_eq!(vec!["1970-01-01T00"], db.partition_keys().unwrap());

        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
@ -864,12 +995,11 @@ mod tests {
    async fn read_from_read_buffer() {
        // Test that data can be loaded into the ReadBuffer
        let db = Arc::new(make_db());
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(db.as_ref(), "cpu bar=1 10").unwrap();
-        writer.write_lp_string(db.as_ref(), "cpu bar=2 20").unwrap();
+        write_lp(db.as_ref(), "cpu bar=1 10");
+        write_lp(db.as_ref(), "cpu bar=2 20");

        let partition_key = "1970-01-01T00";
-        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
+        let mb_chunk = db.rollover_partition(partition_key).await.unwrap();
        let rb_chunk = db
            .load_chunk_to_read_buffer(partition_key, mb_chunk.id())
            .await
@ -909,14 +1039,221 @@ mod tests {
        // cpu").await; assert_table_eq!(expected, &batches);
    }

+    async fn flatten_list_stream(
+        storage: Arc<ObjectStore>,
+        prefix: Option<&Path>,
+    ) -> Result<Vec<Path>> {
+        storage
+            .list(prefix)
+            .await?
+            .map_ok(|v| stream::iter(v).map(Ok))
+            .try_flatten()
+            .try_collect()
+            .await
+    }
+
+    #[tokio::test]
+    async fn write_one_chunk_one_table_to_parquet_file() {
+        // Test that data can be written into parquet files
+
+        // Create an object store with a specified location in a local disk
+        let root = TempDir::new().unwrap();
+        let object_store = Arc::new(ObjectStore::new_file(File::new(root.path())));
+
+        // Create a DB given a server id, an object store and a db name
+        let server_id: NonZeroU32 = NonZeroU32::new(10).unwrap();
+        let db_name = "parquet_test_db";
+        let db = Arc::new(make_database(server_id, Arc::clone(&object_store), db_name));
+
+        // Write some line protocols in Mutable buffer of the DB
+        write_lp(db.as_ref(), "cpu bar=1 10");
+        write_lp(db.as_ref(), "cpu bar=2 20");
+
+        //Now mark the MB chunk close
+        let partition_key = "1970-01-01T00";
+        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
+        // Move that MB chunk to RB chunk and drop it from MB
+        let rb_chunk = db
+            .load_chunk_to_read_buffer(partition_key, mb_chunk.id())
+            .await
+            .unwrap();
+        // Write the RB chunk to Object Store but keep it in RB
+        let pq_chunk = db
+            .write_chunk_to_object_store(partition_key, mb_chunk.id())
+            .await
+            .unwrap();
+
+        // it should be the same chunk!
+        assert_eq!(mb_chunk.id(), rb_chunk.id());
+        assert_eq!(mb_chunk.id(), pq_chunk.id());
+
+        // we should have chunks in the mutable buffer, read buffer, and object store
+        // (Note the currently open chunk is not listed)
+        assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
+        assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
+        assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
+
+        // Verify data written to the parquet file in object store
+        // First, there must be one path of object store in the catalog
+        let paths = pq_chunk.object_store_paths();
+        assert_eq!(paths.len(), 1);
+
+        // Check that the path must exist in the object store
+        let path_list = flatten_list_stream(Arc::clone(&object_store), Some(&paths[0]))
+            .await
+            .unwrap();
+        println!("path_list: {:#?}", path_list);
+        assert_eq!(path_list.len(), 1);
+        assert_eq!(path_list, paths.clone());
+
+        // Get full string path
+        let root_path = format!("{:?}", root.path());
+        let root_path = root_path.trim_matches('"');
+        let path = format!("{}/{}", root_path, paths[0].display());
+        println!("path: {}", path);
+
+        // Create External table of this parquet file to get its content in a human
+        // readable form
+        // Note: We do not care about escaping quotes here because it is just a test
+        let sql = format!(
+            "CREATE EXTERNAL TABLE parquet_table STORED AS PARQUET LOCATION '{}'",
+            path
+        );
+
+        let mut ctx = context::ExecutionContext::new();
+        let df = ctx.sql(&sql).unwrap();
+        df.collect().await.unwrap();
+
+        // Select data from that table
+        let sql = "SELECT * FROM parquet_table";
+        let content = ctx.sql(&sql).unwrap().collect().await.unwrap();
+        println!("Content: {:?}", content);
+        let expected = vec![
+            "+-----+------+",
+            "| bar | time |",
+            "+-----+------+",
+            "| 1   | 10   |",
+            "| 2   | 20   |",
+            "+-----+------+",
+        ];
+        assert_table_eq!(expected, &content);
+    }
+
+    #[tokio::test]
+    async fn write_one_chunk_many_tables_to_parquet_files() {
+        // Test that data can be written into parquet files
+
+        // Create an object store with a specified location in a local disk
+        let root = TempDir::new().unwrap();
+        let object_store = Arc::new(ObjectStore::new_file(File::new(root.path())));
+
+        // Create a DB given a server id, an object store and a db name
+        let server_id: NonZeroU32 = NonZeroU32::new(10).unwrap();
+        let db_name = "parquet_test_db";
+        let db = Arc::new(make_database(server_id, Arc::clone(&object_store), db_name));
+
+        // Write some line protocols in Mutable buffer of the DB
+        write_lp(db.as_ref(), "cpu bar=1 10");
+        write_lp(db.as_ref(), "disk ops=1 20");
+        write_lp(db.as_ref(), "cpu bar=2 20");
+
+        //Now mark the MB chunk close
+        let partition_key = "1970-01-01T00";
+        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
+        // Move that MB chunk to RB chunk and drop it from MB
+        let rb_chunk = db
+            .load_chunk_to_read_buffer(partition_key, mb_chunk.id())
+            .await
+            .unwrap();
+        // Write the RB chunk to Object Store but keep it in RB
+        let pq_chunk = db
+            .write_chunk_to_object_store(partition_key, mb_chunk.id())
+            .await
+            .unwrap();
+
+        // it should be the same chunk!
+        assert_eq!(mb_chunk.id(), rb_chunk.id());
+        assert_eq!(mb_chunk.id(), pq_chunk.id());
+
+        // we should have chunks in the mutable buffer, read buffer, and object store
+        // (Note the currently open chunk is not listed)
+        assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
+        assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
+        assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
+
+        // Verify data written to the parquet files in object store
+        // First, there must be 2 paths of object store in the catalog
+        // that represents 2 files
+        let paths = pq_chunk.object_store_paths();
+        assert_eq!(paths.len(), 2);
+
+        // Check that the path must exist in the object store
+        let prefix = object_store.new_path();
+        let path_list = flatten_list_stream(Arc::clone(&object_store), Some(&prefix))
+            .await
+            .unwrap();
+        println!("path_list: {:#?}", path_list);
+        assert_eq!(path_list.len(), 2);
+
+        // Check the content of each path
+        //
+        // Root path
+        let root_path = format!("{:?}", root.path());
+        let root_path = root_path.trim_matches('"');
+
+        for path in path_list {
+            // Get full string path
+            let path_string = format!("{}/{}", root_path, path.display());
+            println!("path: {}", path_string);
+
+            // Create External table of this parquet file to get its content in a human
+            // readable form
+            // Note: We do not care about escaping quotes here because it is just a test
+            let sql = format!(
+                "CREATE EXTERNAL TABLE parquet_table STORED AS PARQUET LOCATION '{}'",
+                path_string
+            );
+
+            let mut ctx = context::ExecutionContext::new();
+            let df = ctx.sql(&sql).unwrap();
+            df.collect().await.unwrap();
+
+            // Select data from that table
+            let sql = "SELECT * FROM parquet_table";
+            let content = ctx.sql(&sql).unwrap().collect().await.unwrap();
+            println!("Content: {:?}", content);
+            let expected = if path_string.contains("cpu") {
+                // file name: cpu.parquet
+                vec![
+                    "+-----+------+",
+                    "| bar | time |",
+                    "+-----+------+",
+                    "| 1   | 10   |",
+                    "| 2   | 20   |",
+                    "+-----+------+",
+                ]
+            } else {
+                // file name: disk.parquet
+                vec![
+                    "+-----+------+",
+                    "| ops | time |",
+                    "+-----+------+",
+                    "| 1   | 20   |",
+                    "+-----+------+",
+                ]
+            };
+
+            assert_table_eq!(expected, &content);
+        }
+    }
+
    #[tokio::test]
    async fn write_updates_last_write_at() {
        let db = make_db();
        let before_create = Utc::now();

        let partition_key = "1970-01-01T00";
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
+        write_lp(&db, "cpu bar=1 10");
        let after_write = Utc::now();

        let last_write_prev = {
@ -929,7 +1266,7 @@ mod tests {
            partition.last_write_at()
        };

-        writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
+        write_lp(&db, "cpu bar=1 20");
        {
            let partition = db.catalog.valid_partition(partition_key).unwrap();
            let partition = partition.read();
@ -943,8 +1280,7 @@ mod tests {
        let db = make_db();

        // Given data loaded into two chunks
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
+        write_lp(&db, "cpu bar=1 10");
        let after_data_load = Utc::now();

        // When the chunk is rolled over
@ -977,9 +1313,8 @@ mod tests {
        db.rules.write().lifecycle_rules.mutable_size_threshold =
            Some(NonZeroUsize::new(2).unwrap());

-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
-        writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
+        write_lp(&db, "cpu bar=1 10");
+        write_lp(&db, "cpu bar=1 20");

        let partitions = db.catalog.partition_keys();
        assert_eq!(partitions.len(), 1);
@ -996,15 +1331,10 @@ mod tests {
    #[tokio::test]
    async fn chunks_sorted_by_times() {
        let db = make_db();
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, "cpu val=1 1").unwrap();
-        writer
-            .write_lp_string(&db, "mem val=2 400000000000001")
-            .unwrap();
-        writer.write_lp_string(&db, "cpu val=1 2").unwrap();
-        writer
-            .write_lp_string(&db, "mem val=2 400000000000002")
-            .unwrap();
+        write_lp(&db, "cpu val=1 1");
+        write_lp(&db, "mem val=2 400000000000001");
+        write_lp(&db, "cpu val=1 2");
+        write_lp(&db, "mem val=2 400000000000002");

        let sort_rules = SortOrder {
            order: Order::Desc,
@ -1035,9 +1365,9 @@ mod tests {
        // Test that chunk id listing is hooked up
        let db = make_db();
        let partition_key = "1970-01-01T00";
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, "cpu bar=1 10").unwrap();
-        writer.write_lp_string(&db, "cpu bar=1 20").unwrap();
+
+        write_lp(&db, "cpu bar=1 10");
+        write_lp(&db, "cpu bar=1 20");

        assert_eq!(mutable_chunk_ids(&db, partition_key), vec![0]);
        assert_eq!(
@ -1051,13 +1381,13 @@ mod tests {

        // add a new chunk in mutable buffer, and move chunk1 (but
        // not chunk 0) to read buffer
-        writer.write_lp_string(&db, "cpu bar=1 30").unwrap();
+        write_lp(&db, "cpu bar=1 30");
        let mb_chunk = db.rollover_partition("1970-01-01T00").await.unwrap();
        db.load_chunk_to_read_buffer(partition_key, mb_chunk.id())
            .await
            .unwrap();

-        writer.write_lp_string(&db, "cpu bar=1 40").unwrap();
+        write_lp(&db, "cpu bar=1 40");

        assert_eq!(mutable_chunk_ids(&db, partition_key), vec![0, 2]);
        assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![1]);
@ -1086,15 +1416,12 @@ mod tests {
    async fn partition_chunk_summaries() {
        // Test that chunk id listing is hooked up
        let db = make_db();
-        let mut writer = TestLPWriter::default();

-        writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
+        write_lp(&db, "cpu bar=1 1");
        db.rollover_partition("1970-01-01T00").await.unwrap();

        // write into a separate partitiion
-        writer
-            .write_lp_string(&db, "cpu bar=1,baz2,frob=3 400000000000000")
-            .unwrap();
+        write_lp(&db, "cpu bar=1,baz2,frob=3 400000000000000");

        print!("Partitions: {:?}", db.partition_keys().unwrap());

@ -1131,11 +1458,10 @@ mod tests {
    #[tokio::test]
    async fn partition_chunk_summaries_timestamp() {
        let db = make_db();
-        let mut writer = TestLPWriter::default();
        let start = Utc::now();
-        writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
+        write_lp(&db, "cpu bar=1 1");
        let after_first_write = Utc::now();
-        writer.write_lp_string(&db, "cpu bar=2 2").unwrap();
+        write_lp(&db, "cpu bar=2 2");
        db.rollover_partition("1970-01-01T00").await.unwrap();
        let after_close = Utc::now();

@ -1183,17 +1509,13 @@ mod tests {
    async fn chunk_summaries() {
        // Test that chunk id listing is hooked up
        let db = make_db();
-        let mut writer = TestLPWriter::default();

        // get three chunks: one open, one closed in mb and one close in rb
-        writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
+        write_lp(&db, "cpu bar=1 1");
        db.rollover_partition("1970-01-01T00").await.unwrap();

-        writer.write_lp_string(&db, "cpu bar=1,baz=2 2").unwrap();
-
-        writer
-            .write_lp_string(&db, "cpu bar=1,baz=2,frob=3 400000000000000")
-            .unwrap();
+        write_lp(&db, "cpu bar=1,baz=2 2");
+        write_lp(&db, "cpu bar=1,baz=2,frob=3 400000000000000");

        print!("Partitions: {:?}", db.partition_keys().unwrap());

@ -1204,9 +1526,7 @@ mod tests {
        print!("Partitions2: {:?}", db.partition_keys().unwrap());

        db.rollover_partition("1970-01-05T15").await.unwrap();
-        writer
-            .write_lp_string(&db, "cpu bar=1,baz=3,blargh=3 400000000000000")
-            .unwrap();
+        write_lp(&db, "cpu bar=1,baz=3,blargh=3 400000000000000");

        fn to_arc(s: &str) -> Arc<String> {
            Arc::new(s.to_string())
@ -1256,12 +1576,11 @@ mod tests {
    async fn partition_summaries() {
        // Test that chunk id listing is hooked up
        let db = make_db();
-        let mut writer = TestLPWriter::default();

-        writer.write_lp_string(&db, "cpu bar=1 1").unwrap();
+        write_lp(&db, "cpu bar=1 1");
        let chunk_id = db.rollover_partition("1970-01-01T00").await.unwrap().id();
-        writer.write_lp_string(&db, "cpu bar=2,baz=3.0 2").unwrap();
-        writer.write_lp_string(&db, "mem foo=1 1").unwrap();
+        write_lp(&db, "cpu bar=2,baz=3.0 2");
+        write_lp(&db, "mem foo=1 1");

        // load a chunk to the read buffer
        db.load_chunk_to_read_buffer("1970-01-01T00", chunk_id)
@ -1269,12 +1588,8 @@ mod tests {
            .unwrap();

        // write into a separate partitiion
-        writer
-            .write_lp_string(&db, "cpu bar=1 400000000000000")
-            .unwrap();
-        writer
-            .write_lp_string(&db, "mem frob=3 400000000000001")
-            .unwrap();
+        write_lp(&db, "cpu bar=1 400000000000000");
+        write_lp(&db, "mem frob=3 400000000000001");

        print!("Partitions: {:?}", db.partition_keys().unwrap());

@ -1398,11 +1713,11 @@ mod tests {
    // run a sql query against the database, returning the results as record batches
    async fn run_query(db: Arc<Db>, query: &str) -> Vec<RecordBatch> {
        let planner = SQLQueryPlanner::default();
-        let executor = Executor::new();
+        let executor = db.executor();

-        let physical_plan = planner.query(db, query, &executor).await.unwrap();
+        let physical_plan = planner.query(db, query, &executor).unwrap();

-        collect(physical_plan).await.unwrap()
+        executor.collect(physical_plan).await.unwrap()
    }

    fn mutable_chunk_ids(db: &Db, partition_key: &str) -> Vec<u32> {
@ -1426,10 +1741,62 @@ mod tests {
            .into_iter()
            .filter_map(|chunk| match chunk.storage {
                ChunkStorage::ReadBuffer => Some(chunk.id),
+                ChunkStorage::ReadBufferAndObjectStore => Some(chunk.id),
                _ => None,
            })
            .collect();
        chunk_ids.sort_unstable();
        chunk_ids
    }
+
+    fn read_parquet_file_chunk_ids(db: &Db, partition_key: &str) -> Vec<u32> {
+        let mut chunk_ids: Vec<u32> = db
+            .partition_chunk_summaries(partition_key)
+            .into_iter()
+            .filter_map(|chunk| match chunk.storage {
+                ChunkStorage::ReadBufferAndObjectStore => Some(chunk.id),
+                ChunkStorage::ObjectStoreOnly => Some(chunk.id),
+                _ => None,
+            })
+            .collect();
+        chunk_ids.sort_unstable();
+        chunk_ids
+    }
+
+    #[tokio::test]
+    async fn write_chunk_to_object_store_in_background() {
+        // Test that data can be written to object store using a background task
+        let db = Arc::new(make_db());
+
+        // create MB partition
+        write_lp(db.as_ref(), "cpu bar=1 10");
+        write_lp(db.as_ref(), "cpu bar=2 20");
+
+        // MB => RB
+        let partition_key = "1970-01-01T00";
+        let mb_chunk = db.rollover_partition(partition_key).await.unwrap();
+        let rb_chunk = db
+            .load_chunk_to_read_buffer(partition_key, mb_chunk.id())
+            .await
+            .unwrap();
+        assert_eq!(mb_chunk.id(), rb_chunk.id());
+
+        // RB => OS
+        let task =
+            db.write_chunk_to_object_store_in_background(partition_key.to_string(), rb_chunk.id());
+        let t_start = std::time::Instant::now();
+        while !task.is_complete() {
+            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
+            assert!(
+                std::time::Instant::now() - t_start < std::time::Duration::from_secs(10),
+                "task deadline exceeded"
+            );
+        }
+
+        // we should have chunks in the mutable buffer, read buffer, and object store
+        // (Note the currently open chunk is not listed)
+        assert_eq!(mutable_chunk_ids(&db, partition_key), vec![1]);
+        assert_eq!(read_buffer_chunk_ids(&db, partition_key), vec![0]);
+        assert_eq!(read_parquet_file_chunk_ids(&db, partition_key), vec![0]);
+    }
 }
--- a/server/src/db/catalog/chunk.rs
+++ b/server/src/db/catalog/chunk.rs
@ -171,8 +171,12 @@ impl Chunk {
            ChunkState::Closing(chunk) => (chunk.size(), ChunkStorage::ClosedMutableBuffer),
            ChunkState::Moving(chunk) => (chunk.size(), ChunkStorage::ClosedMutableBuffer),
            ChunkState::Moved(chunk) => (chunk.size(), ChunkStorage::ReadBuffer),
-            ChunkState::WritingToObjectStore(chunk) => (chunk.size(), ChunkStorage::ObjectStore),
-            ChunkState::WrittenToObjectStore(chunk, _) => (chunk.size(), ChunkStorage::ObjectStore),
+            ChunkState::WritingToObjectStore(chunk) => {
+                (chunk.size(), ChunkStorage::ReadBufferAndObjectStore)
+            }
+            ChunkState::WrittenToObjectStore(chunk, _) => {
+                (chunk.size(), ChunkStorage::ReadBufferAndObjectStore)
+            }
        };

        ChunkSummary {
--- a/server/src/db/chunk.rs
+++ b/server/src/db/chunk.rs
@ -1,24 +1,28 @@
 use arrow_deps::datafusion::physical_plan::SendableRecordBatchStream;
 use internal_types::{schema::Schema, selection::Selection};
-use mutable_buffer::chunk::Chunk as MBChunk;
+use mutable_buffer::chunk::snapshot::ChunkSnapshot;
+use object_store::path::Path;
 use observability_deps::tracing::debug;
 use parquet_file::chunk::Chunk as ParquetChunk;
 use query::{exec::stringset::StringSet, predicate::Predicate, PartitionChunk};
 use read_buffer::Chunk as ReadBufferChunk;
 use snafu::{ResultExt, Snafu};

-use std::{collections::BTreeSet, sync::Arc};
+use std::{
+    collections::{BTreeMap, BTreeSet},
+    sync::Arc,
+};

 use super::{
-    pred::{to_mutable_buffer_predicate, to_read_buffer_predicate},
-    streams::{MutableBufferChunkStream, ReadFilterResultsStream},
+    pred::to_read_buffer_predicate,
+    streams::{MemoryStream, ReadFilterResultsStream},
 };

 #[derive(Debug, Snafu)]
 pub enum Error {
    #[snafu(display("Mutable Buffer Chunk Error: {}", source))]
    MutableBufferChunk {
-        source: mutable_buffer::chunk::Error,
+        source: mutable_buffer::chunk::snapshot::Error,
    },

    #[snafu(display("Read Buffer Error in chunk {}: {}", chunk_id, source))]
@ -27,6 +31,15 @@ pub enum Error {
        chunk_id: u32,
    },

+    #[snafu(display("Read Buffer Error in chunk {}: {}", chunk_id, msg))]
+    ReadBufferError { chunk_id: u32, msg: String },
+
+    #[snafu(display("Parquet File Error in chunk {}: {}", chunk_id, source))]
+    ParquetFileChunkError {
+        source: parquet_file::chunk::Error,
+        chunk_id: u32,
+    },
+
    #[snafu(display("Internal error restricting schema: {}", source))]
    InternalSelectingSchema {
        source: internal_types::schema::Error,
@ -58,10 +71,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
 #[derive(Debug)]
 pub enum DBChunk {
    MutableBuffer {
-        chunk: Arc<MBChunk>,
-        partition_key: Arc<String>,
-        /// is this chunk open for writing?
-        open: bool,
+        chunk: Arc<ChunkSnapshot>,
    },
    ReadBuffer {
        chunk: Arc<ReadBufferChunk>,
@ -83,36 +93,12 @@ impl DBChunk {
            ChunkState::Invalid => {
                panic!("Invalid internal state");
            }
-            ChunkState::Open(chunk) => {
-                // TODO the performance if cloning the chunk is terrible
-                // Proper performance is tracked in
-                // https://github.com/influxdata/influxdb_iox/issues/635
-                let chunk = Arc::new(chunk.clone());
-                Self::MutableBuffer {
-                    chunk,
-                    partition_key,
-                    open: true,
-                }
-            }
-            ChunkState::Closing(chunk) => {
-                // TODO the performance if cloning the chunk is terrible
-                // Proper performance is tracked in
-                // https://github.com/influxdata/influxdb_iox/issues/635
-                let chunk = Arc::new(chunk.clone());
-                Self::MutableBuffer {
-                    chunk,
-                    partition_key,
-                    open: false,
-                }
-            }
-            ChunkState::Moving(chunk) => {
-                let chunk = Arc::clone(chunk);
-                Self::MutableBuffer {
-                    chunk,
-                    partition_key,
-                    open: false,
-                }
-            }
+            ChunkState::Open(chunk) | ChunkState::Closing(chunk) => Self::MutableBuffer {
+                chunk: chunk.snapshot(),
+            },
+            ChunkState::Moving(chunk) => Self::MutableBuffer {
+                chunk: chunk.snapshot(),
+            },
            ChunkState::Moved(chunk) => Self::ReadBuffer {
                chunk: Arc::clone(chunk),
                partition_key,
@ -128,6 +114,14 @@ impl DBChunk {
        };
        Arc::new(db_chunk)
    }
+
+    /// Return object store paths
+    pub fn object_store_paths(&self) -> Vec<Path> {
+        match self {
+            Self::ParquetFile { chunk } => chunk.all_paths(),
+            _ => vec![],
+        }
+    }
 }

 impl PartitionChunk for DBChunk {
@ -135,15 +129,17 @@ impl PartitionChunk for DBChunk {

    fn id(&self) -> u32 {
        match self {
-            Self::MutableBuffer { chunk, .. } => chunk.id(),
+            Self::MutableBuffer { chunk, .. } => chunk.chunk_id(),
            Self::ReadBuffer { chunk, .. } => chunk.id(),
-            Self::ParquetFile { .. } => unimplemented!("parquet file not implemented"),
+            Self::ParquetFile { chunk, .. } => chunk.id(),
        }
    }

    fn all_table_names(&self, known_tables: &mut StringSet) {
        match self {
-            Self::MutableBuffer { chunk, .. } => chunk.all_table_names(known_tables),
+            Self::MutableBuffer { chunk, .. } => {
+                known_tables.extend(chunk.table_names(None).cloned())
+            }
            Self::ReadBuffer { chunk, .. } => {
                // TODO - align APIs so they behave in the same way...
                let rb_names = chunk.all_table_names(known_tables);
@ -151,42 +147,22 @@ impl PartitionChunk for DBChunk {
                    known_tables.insert(name);
                }
            }
-            Self::ParquetFile { .. } => unimplemented!("parquet file not implemented"),
+            Self::ParquetFile { chunk, .. } => chunk.all_table_names(known_tables),
        }
    }

    fn table_names(
        &self,
        predicate: &Predicate,
-        _known_tables: &StringSet,
+        _known_tables: &StringSet, // TODO: Should this be being used?
    ) -> Result<Option<StringSet>, Self::Error> {
        let names = match self {
            Self::MutableBuffer { chunk, .. } => {
-                if chunk.is_empty() {
-                    Some(StringSet::new())
-                } else {
-                    let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
-                        Ok(chunk_predicate) => chunk_predicate,
-                        Err(e) => {
-                            debug!(?predicate, %e, "mutable buffer predicate not supported for table_names, falling back");
-                            return Ok(None);
-                        }
-                    };
-
-                    // we don't support arbitrary expressions in chunk predicate yet
-                    if !chunk_predicate.chunk_exprs.is_empty() {
-                        None
-                    } else {
-                        let names = chunk
-                            .table_names(&chunk_predicate)
-                            .context(MutableBufferChunk)?
-                            .into_iter()
-                            .map(|s| s.to_string())
-                            .collect::<StringSet>();
-
-                        Some(names)
-                    }
+                if predicate.has_exprs() {
+                    // TODO: Support more predicates
+                    return Ok(None);
                }
+                chunk.table_names(predicate.range).cloned().collect()
            }
            Self::ReadBuffer { chunk, .. } => {
                // If not supported, ReadBuffer can't answer with
@ -199,26 +175,19 @@ impl PartitionChunk for DBChunk {
                    }
                };

-                Some(chunk.table_names(&rb_predicate, &BTreeSet::new()))
-            }
-            Self::ParquetFile { .. } => {
-                unimplemented!("parquet file not implemented")
+                chunk.table_names(&rb_predicate, &BTreeSet::new())
            }
+            Self::ParquetFile { chunk, .. } => chunk.table_names(predicate.range).collect(),
        };

        // Prune out tables that should not be
        // present (based on additional table restrictions of the Predicate)
-        //
-        // This is needed because at time of writing, the ReadBuffer's
-        // table_names implementation doesn't include any way to
-        // further restrict the tables to a known set of tables
-        let names = names.map(|names| {
+        Ok(Some(
            names
                .into_iter()
                .filter(|table_name| predicate.should_include_table(table_name))
-                .collect()
-        });
-        Ok(names)
+                .collect(),
+        ))
    }

    fn table_schema(
@ -253,8 +222,12 @@ impl PartitionChunk for DBChunk {

                Ok(schema)
            }
-            Self::ParquetFile { .. } => {
-                unimplemented!("parquet file not implemented for table schema")
+            Self::ParquetFile { chunk, .. } => {
+                chunk
+                    .table_schema(table_name, selection)
+                    .context(ParquetFileChunkError {
+                        chunk_id: chunk.id(),
+                    })
            }
        }
    }
@ -263,9 +236,7 @@ impl PartitionChunk for DBChunk {
        match self {
            Self::MutableBuffer { chunk, .. } => chunk.has_table(table_name),
            Self::ReadBuffer { chunk, .. } => chunk.has_table(table_name),
-            Self::ParquetFile { .. } => {
-                unimplemented!("parquet file not implemented for has_table")
-            }
+            Self::ParquetFile { chunk, .. } => chunk.has_table(table_name),
        }
    }

@ -277,22 +248,17 @@ impl PartitionChunk for DBChunk {
    ) -> Result<SendableRecordBatchStream, Self::Error> {
        match self {
            Self::MutableBuffer { chunk, .. } => {
-                // Note MutableBuffer doesn't support predicate
-                // pushdown (other than pruning out the entire chunk
-                // via `might_pass_predicate)
                if !predicate.is_empty() {
                    return InternalPredicateNotSupported {
                        predicate: predicate.clone(),
                    }
                    .fail();
                }
-                let schema: Schema = self.table_schema(table_name, selection)?;
+                let batch = chunk
+                    .read_filter(table_name, selection)
+                    .context(MutableBufferChunk)?;

-                Ok(Box::pin(MutableBufferChunkStream::new(
-                    Arc::clone(&chunk),
-                    schema.as_arrow(),
-                    table_name,
-                )))
+                Ok(Box::pin(MemoryStream::new(batch)))
            }
            Self::ReadBuffer { chunk, .. } => {
                // Error converting to a rb_predicate needs to fail
@ -354,17 +320,11 @@ impl PartitionChunk for DBChunk {
    ) -> Result<Option<StringSet>, Self::Error> {
        match self {
            Self::MutableBuffer { chunk, .. } => {
-                let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
-                    Ok(chunk_predicate) => chunk_predicate,
-                    Err(e) => {
-                        debug!(?predicate, %e, "mutable buffer predicate not supported for column_names, falling back");
-                        return Ok(None);
-                    }
-                };
-
-                chunk
-                    .column_names(table_name, &chunk_predicate, columns)
-                    .context(MutableBufferChunk)
+                if !predicate.is_empty() {
+                    // TODO: Support predicates
+                    return Ok(None);
+                }
+                Ok(chunk.column_names(table_name, columns))
            }
            Self::ReadBuffer { chunk, .. } => {
                let rb_predicate = match to_read_buffer_predicate(&predicate) {
@ -396,32 +356,47 @@ impl PartitionChunk for DBChunk {
        predicate: &Predicate,
    ) -> Result<Option<StringSet>, Self::Error> {
        match self {
-            Self::MutableBuffer { chunk, .. } => {
-                use mutable_buffer::chunk::Error::UnsupportedColumnTypeForListingValues;
-
-                let chunk_predicate = match to_mutable_buffer_predicate(chunk, predicate) {
-                    Ok(chunk_predicate) => chunk_predicate,
+            Self::MutableBuffer { .. } => {
+                // There is no advantage to manually implementing this
+                // vs just letting DataFusion do its thing
+                Ok(None)
+            }
+            Self::ReadBuffer { chunk, .. } => {
+                let rb_predicate = match to_read_buffer_predicate(predicate) {
+                    Ok(rb_predicate) => rb_predicate,
                    Err(e) => {
-                        debug!(?predicate, %e, "mutable buffer predicate not supported for column_values, falling back");
+                        debug!(?predicate, %e, "read buffer predicate not supported for column_names, falling back");
                        return Ok(None);
                    }
                };

-                let values = chunk.tag_column_values(table_name, column_name, &chunk_predicate);
+                let mut values = chunk
+                    .column_values(
+                        table_name,
+                        rb_predicate,
+                        Selection::Some(&[column_name]),
+                        BTreeMap::new(),
+                    )
+                    .context(ReadBufferChunkError {
+                        chunk_id: chunk.id(),
+                    })?;

-                // if the mutable buffer doesn't support getting
-                // values for this kind of column, report back None
-                if let Err(UnsupportedColumnTypeForListingValues { .. }) = values {
-                    Ok(None)
-                } else {
-                    values.context(MutableBufferChunk)
-                }
-            }
-            Self::ReadBuffer { .. } => {
-                // TODO hook up read buffer API here when ready. Until
-                // now, fallback to using a full plan
-                // https://github.com/influxdata/influxdb_iox/issues/857
-                Ok(None)
+                // The InfluxRPC frontend only supports getting column values
+                // for one column at a time (this is a restriction on the Influx
+                // Read gRPC API too). However, the Read Buffer support multiple
+                // columns and will return a map - we just need to pull the
+                // column out to get the set of values.
+                let values = values
+                    .remove(column_name)
+                    .ok_or_else(|| Error::ReadBufferError {
+                        chunk_id: chunk.id(),
+                        msg: format!(
+                            "failed to find column_name {:?} in results of tag_values",
+                            column_name
+                        ),
+                    })?;
+
+                Ok(Some(values))
            }
            Self::ParquetFile { .. } => {
                unimplemented!("parquet file not implemented for column_values")
--- a/server/src/db/lifecycle.rs
+++ b/server/src/db/lifecycle.rs
@ -20,6 +20,7 @@ pub struct LifecycleManager {
    db: Arc<Db>,
    db_name: String,
    move_task: Option<TaskTracker<Job>>,
+    write_task: Option<TaskTracker<Job>>,
 }

 impl LifecycleManager {
@ -30,6 +31,7 @@ impl LifecycleManager {
            db,
            db_name,
            move_task: None,
+            write_task: None,
        }
    }

@ -65,9 +67,15 @@ trait ChunkMover {
    /// Returns a boolean indicating if a move is in progress
    fn is_move_active(&self) -> bool;

+    /// Returns a boolean indicating if a write is in progress
+    fn is_write_active(&self) -> bool;
+
    /// Starts an operation to move a chunk to the read buffer
    fn move_to_read_buffer(&mut self, partition_key: String, chunk_id: u32);

+    /// Starts an operation to write a chunk to the object store
+    fn write_to_object_store(&mut self, partition_key: String, chunk_id: u32);
+
    /// Drops a chunk from the database
    fn drop_chunk(&mut self, partition_key: String, chunk_id: u32);

@ -78,10 +86,11 @@ trait ChunkMover {

        let mut buffer_size = 0;

-        // Only want to start a new move task if there isn't one already in-flight
+        // Only want to start a new move/write task if there isn't one already in-flight
        //
        // Note: This does not take into account manually triggered tasks
        let mut move_active = self.is_move_active();
+        let mut write_active = self.is_write_active();

        // Iterate through the chunks to determine
        // - total memory consumption
@ -90,33 +99,44 @@ trait ChunkMover {
        // TODO: Track size globally to avoid iterating through all chunks (#1100)
        for chunk in &chunks {
            let chunk_guard = chunk.upgradable_read();
+
            buffer_size += Self::chunk_size(&*chunk_guard);

-            if !move_active && can_move(&rules, &*chunk_guard, now) {
-                match chunk_guard.state() {
-                    ChunkState::Open(_) => {
-                        let mut chunk_guard = RwLockUpgradableReadGuard::upgrade(chunk_guard);
-                        chunk_guard.set_closing().expect("cannot close open chunk");
+            let would_move = !move_active && can_move(&rules, &*chunk_guard, now);
+            let would_write = !write_active && rules.persist;

-                        let partition_key = chunk_guard.key().to_string();
-                        let chunk_id = chunk_guard.id();
+            match chunk_guard.state() {
+                ChunkState::Open(_) if would_move => {
+                    let mut chunk_guard = RwLockUpgradableReadGuard::upgrade(chunk_guard);
+                    chunk_guard.set_closing().expect("cannot close open chunk");

-                        std::mem::drop(chunk_guard);
+                    let partition_key = chunk_guard.key().to_string();
+                    let chunk_id = chunk_guard.id();

-                        move_active = true;
-                        self.move_to_read_buffer(partition_key, chunk_id);
-                    }
-                    ChunkState::Closing(_) => {
-                        let partition_key = chunk_guard.key().to_string();
-                        let chunk_id = chunk_guard.id();
+                    std::mem::drop(chunk_guard);

-                        std::mem::drop(chunk_guard);
-
-                        move_active = true;
-                        self.move_to_read_buffer(partition_key, chunk_id);
-                    }
-                    _ => {}
+                    move_active = true;
+                    self.move_to_read_buffer(partition_key, chunk_id);
                }
+                ChunkState::Closing(_) if would_move => {
+                    let partition_key = chunk_guard.key().to_string();
+                    let chunk_id = chunk_guard.id();
+
+                    std::mem::drop(chunk_guard);
+
+                    move_active = true;
+                    self.move_to_read_buffer(partition_key, chunk_id);
+                }
+                ChunkState::Moved(_) if would_write => {
+                    let partition_key = chunk_guard.key().to_string();
+                    let chunk_id = chunk_guard.id();
+
+                    std::mem::drop(chunk_guard);
+
+                    write_active = true;
+                    self.write_to_object_store(partition_key, chunk_id);
+                }
+                _ => {}
            }

            // TODO: Find and recover cancelled move jobs (#1099)
@ -129,8 +149,9 @@ trait ChunkMover {
                match chunks.next() {
                    Some(chunk) => {
                        let chunk_guard = chunk.read();
-                        if rules.drop_non_persisted
-                            || matches!(chunk_guard.state(), ChunkState::Moved(_))
+                        if (rules.drop_non_persisted
+                            && matches!(chunk_guard.state(), ChunkState::Moved(_)))
+                            || matches!(chunk_guard.state(), ChunkState::WrittenToObjectStore(_, _))
                        {
                            let partition_key = chunk_guard.key().to_string();
                            let chunk_id = chunk_guard.id();
@ -169,6 +190,13 @@ impl ChunkMover for LifecycleManager {
            .unwrap_or(false)
    }

+    fn is_write_active(&self) -> bool {
+        self.write_task
+            .as_ref()
+            .map(|x| !x.is_complete())
+            .unwrap_or(false)
+    }
+
    fn move_to_read_buffer(&mut self, partition_key: String, chunk_id: u32) {
        info!(%partition_key, %chunk_id, "moving chunk to read buffer");
        self.move_task = Some(
@ -177,6 +205,14 @@ impl ChunkMover for LifecycleManager {
        )
    }

+    fn write_to_object_store(&mut self, partition_key: String, chunk_id: u32) {
+        info!(%partition_key, %chunk_id, "write chunk to object store");
+        self.write_task = Some(
+            self.db
+                .write_chunk_to_object_store_in_background(partition_key, chunk_id),
+        )
+    }
+
    fn drop_chunk(&mut self, partition_key: String, chunk_id: u32) {
        info!(%partition_key, %chunk_id, "dropping chunk");
        let _ = self
@ -251,9 +287,57 @@ mod tests {
        chunk
    }

+    /// Transitions a new ("open") chunk into the "moving" state.
+    fn transition_to_moving(mut chunk: Chunk) -> Chunk {
+        chunk.set_closing().unwrap();
+        chunk.set_moving().unwrap();
+        chunk
+    }
+
+    /// Transitions a new ("open") chunk into the "moved" state.
+    fn transition_to_moved(mut chunk: Chunk, rb: &Arc<read_buffer::Chunk>) -> Chunk {
+        chunk = transition_to_moving(chunk);
+        chunk.set_moved(Arc::clone(&rb)).unwrap();
+        chunk
+    }
+
+    /// Transitions a new ("open") chunk into the "writing to object store"
+    /// state.
+    fn transition_to_writing_to_object_store(
+        mut chunk: Chunk,
+        rb: &Arc<read_buffer::Chunk>,
+    ) -> Chunk {
+        chunk = transition_to_moved(chunk, rb);
+        chunk.set_writing_to_object_store().unwrap();
+        chunk
+    }
+
+    /// Transitions a new ("open") chunk into the "written to object store"
+    /// state.
+    fn transition_to_written_to_object_store(
+        mut chunk: Chunk,
+        rb: &Arc<read_buffer::Chunk>,
+    ) -> Chunk {
+        chunk = transition_to_writing_to_object_store(chunk, rb);
+        let parquet_chunk = new_parquet_chunk(&chunk);
+        chunk
+            .set_written_to_object_store(Arc::new(parquet_chunk))
+            .unwrap();
+        chunk
+    }
+
+    fn new_parquet_chunk(chunk: &Chunk) -> parquet_file::chunk::Chunk {
+        parquet_file::chunk::Chunk::new(
+            chunk.key().to_string(),
+            chunk.id(),
+            &tracker::MemRegistry::new(),
+        )
+    }
+
    #[derive(Debug, Eq, PartialEq)]
    enum MoverEvents {
        Move(u32),
+        Write(u32),
        Drop(u32),
    }

@ -262,6 +346,7 @@ mod tests {
    struct DummyMover {
        rules: LifecycleRules,
        move_active: bool,
+        write_active: bool,
        chunks: Vec<Arc<RwLock<Chunk>>>,
        events: Vec<MoverEvents>,
    }
@ -275,6 +360,7 @@ mod tests {
                    .map(|x| Arc::new(RwLock::new(x)))
                    .collect(),
                move_active: false,
+                write_active: false,
                events: vec![],
            }
        }
@ -298,6 +384,10 @@ mod tests {
            self.move_active
        }

+        fn is_write_active(&self) -> bool {
+            self.write_active
+        }
+
        fn move_to_read_buffer(&mut self, _: String, chunk_id: u32) {
            let chunk = self
                .chunks
@ -308,7 +398,22 @@ mod tests {
            self.events.push(MoverEvents::Move(chunk_id))
        }

+        fn write_to_object_store(&mut self, _partition_key: String, chunk_id: u32) {
+            let chunk = self
+                .chunks
+                .iter()
+                .find(|x| x.read().id() == chunk_id)
+                .unwrap();
+            chunk.write().set_writing_to_object_store().unwrap();
+            self.events.push(MoverEvents::Write(chunk_id))
+        }
+
        fn drop_chunk(&mut self, _: String, chunk_id: u32) {
+            self.chunks = self
+                .chunks
+                .drain(..)
+                .filter(|x| x.read().id() != chunk_id)
+                .collect();
            self.events.push(MoverEvents::Drop(chunk_id))
        }

@ -467,7 +572,56 @@ mod tests {
    }

    #[test]
-    fn test_buffer_size_soft() {
+    fn test_buffer_size_soft_drop_non_persisted() {
+        // test that chunk mover only drops moved and written chunks
+
+        // IMPORTANT: the lifecycle rules have the default `persist` flag (false) so NOT
+        // "write" events will be triggered
+        let rules = LifecycleRules {
+            buffer_size_soft: Some(NonZeroUsize::new(5).unwrap()),
+            drop_non_persisted: true,
+            ..Default::default()
+        };
+
+        let rb = Arc::new(read_buffer::Chunk::new_with_memory_tracker(
+            22,
+            &tracker::MemRegistry::new(),
+        ));
+
+        let chunks = vec![new_chunk(0, Some(0), Some(0))];
+
+        let mut mover = DummyMover::new(rules.clone(), chunks);
+
+        mover.check_for_work(from_secs(10));
+        assert_eq!(mover.events, vec![]);
+
+        let chunks = vec![
+            // two "open" chunks => they must not be dropped (yet)
+            new_chunk(0, Some(0), Some(0)),
+            new_chunk(1, Some(0), Some(0)),
+            // "moved" chunk => can be dropped because `drop_non_persistent=true`
+            transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
+            // "writing" chunk => cannot be drop while write is in-progess
+            transition_to_writing_to_object_store(new_chunk(3, Some(0), Some(0)), &rb),
+            // "written" chunk => can be dropped
+            transition_to_written_to_object_store(new_chunk(4, Some(0), Some(0)), &rb),
+        ];
+
+        let mut mover = DummyMover::new(rules, chunks);
+
+        mover.check_for_work(from_secs(10));
+        assert_eq!(
+            mover.events,
+            vec![MoverEvents::Drop(2), MoverEvents::Drop(4)]
+        );
+    }
+
+    #[test]
+    fn test_buffer_size_soft_dont_drop_non_persisted() {
+        // test that chunk mover only drops written chunks
+
+        // IMPORTANT: the lifecycle rules have the default `persist` flag (false) so NOT
+        // "write" events will be triggered
        let rules = LifecycleRules {
            buffer_size_soft: Some(NonZeroUsize::new(5).unwrap()),
            ..Default::default()
@ -485,21 +639,27 @@ mod tests {
        mover.check_for_work(from_secs(10));
        assert_eq!(mover.events, vec![]);

-        let mut chunks = vec![
+        let chunks = vec![
+            // two "open" chunks => they must not be dropped (yet)
            new_chunk(0, Some(0), Some(0)),
            new_chunk(1, Some(0), Some(0)),
-            new_chunk(2, Some(0), Some(0)),
+            // "moved" chunk => cannot be dropped because `drop_non_persistent=false`
+            transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
+            // "writing" chunk => cannot be drop while write is in-progess
+            transition_to_writing_to_object_store(new_chunk(3, Some(0), Some(0)), &rb),
+            // "written" chunk => can be dropped
+            transition_to_written_to_object_store(new_chunk(4, Some(0), Some(0)), &rb),
        ];

-        chunks[2].set_closing().unwrap();
-        chunks[2].set_moving().unwrap();
-        chunks[2].set_moved(Arc::clone(&rb)).unwrap();
-
        let mut mover = DummyMover::new(rules, chunks);

        mover.check_for_work(from_secs(10));
-        assert_eq!(mover.events, vec![MoverEvents::Drop(2)]);
+        assert_eq!(mover.events, vec![MoverEvents::Drop(4)]);
+    }

+    #[test]
+    fn test_buffer_size_soft_no_op() {
+        // check that we don't drop anything if nothing is to drop
        let rules = LifecycleRules {
            buffer_size_soft: Some(NonZeroUsize::new(40).unwrap()),
            ..Default::default()
@ -512,4 +672,33 @@ mod tests {
        mover.check_for_work(from_secs(10));
        assert_eq!(mover.events, vec![]);
    }
+
+    #[test]
+    fn test_persist() {
+        let rules = LifecycleRules {
+            mutable_linger_seconds: Some(NonZeroU32::new(10).unwrap()),
+            persist: true,
+            ..Default::default()
+        };
+
+        let rb = Arc::new(read_buffer::Chunk::new_with_memory_tracker(
+            22,
+            &tracker::MemRegistry::new(),
+        ));
+
+        let chunks = vec![
+            // still moving => cannot write
+            transition_to_moving(new_chunk(0, Some(0), Some(0))),
+            // moved => write to object store
+            transition_to_moved(new_chunk(1, Some(0), Some(0)), &rb),
+            // moved, but there will be already a write in progress (previous chunk) => don't write
+            transition_to_moved(new_chunk(2, Some(0), Some(0)), &rb),
+        ];
+
+        let mut mover = DummyMover::new(rules, chunks);
+
+        mover.check_for_work(from_secs(0));
+
+        assert_eq!(mover.events, vec![MoverEvents::Write(1)]);
+    }
 }
--- a/server/src/db/pred.rs
+++ b/server/src/db/pred.rs
@ -3,7 +3,6 @@

 use std::convert::TryFrom;

-use mutable_buffer::{chunk::Chunk, pred::ChunkPredicate};
 use query::predicate::Predicate;
 use snafu::Snafu;

@ -11,15 +10,6 @@ use snafu::Snafu;
 pub enum Error {
    #[snafu(display("Error translating predicate: {}", msg))]
    ReadBufferPredicate { msg: String, pred: Predicate },
-
-    #[snafu(display("Error building predicate for mutable buffer: {}", source))]
-    MutableBufferPredicate { source: mutable_buffer::pred::Error },
-}
-
-impl From<mutable_buffer::pred::Error> for Error {
-    fn from(source: mutable_buffer::pred::Error) -> Self {
-        Self::MutableBufferPredicate { source }
-    }
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -52,25 +42,6 @@ pub fn to_read_buffer_predicate(predicate: &Predicate) -> Result<read_buffer::Pr
    }
 }

-/// Converts a [`query::Predicate`] into [`ChunkPredicate`],
-/// suitable for evaluating on the MutableBuffer.
-pub fn to_mutable_buffer_predicate(
-    chunk: impl AsRef<Chunk>,
-    predicate: &Predicate,
-) -> Result<ChunkPredicate> {
-    let predicate = chunk
-        .as_ref()
-        .predicate_builder()?
-        .table_names(predicate.table_names.as_ref())?
-        .field_names(predicate.field_columns.as_ref())?
-        .range(predicate.range)?
-        // it would be nice to avoid cloning all the exprs here.
-        .exprs(predicate.exprs.clone())?
-        .build();
-
-    Ok(predicate)
-}
-
 #[cfg(test)]
 pub mod test {
    use super::*;
@ -196,7 +167,6 @@ pub mod test {
                Error::ReadBufferPredicate { msg, pred: _ } => {
                    assert_eq!(msg, exp.to_owned());
                }
-                _ => panic!("Unexpected error type"),
            }
        }
    }
--- a/server/src/db/streams.rs
+++ b/server/src/db/streams.rs
@ -1,15 +1,9 @@
 //! Adapter streams for different Chunk types that implement the interface
 //! needed by DataFusion
 use arrow_deps::{
-    arrow::{
-        datatypes::SchemaRef,
-        error::{ArrowError, Result as ArrowResult},
-        record_batch::RecordBatch,
-    },
+    arrow::{datatypes::SchemaRef, error::Result as ArrowResult, record_batch::RecordBatch},
    datafusion::physical_plan::RecordBatchStream,
 };
-use internal_types::selection::Selection;
-use mutable_buffer::chunk::Chunk as MBChunk;
 use read_buffer::ReadFilterResults;

 use std::{
@ -17,99 +11,6 @@ use std::{
    task::{Context, Poll},
 };

-use snafu::{ResultExt, Snafu};
-
-#[derive(Debug, Snafu)]
-pub enum Error {
-    #[snafu(display(
-        "Error getting data for table '{}' chunk {}: {}",
-        table_name,
-        chunk_id,
-        source
-    ))]
-    GettingTableData {
-        table_name: String,
-        chunk_id: u32,
-        source: mutable_buffer::chunk::Error,
-    },
-}
-
-/// Adapter which will produce record batches from a mutable buffer
-/// chunk on demand
-pub(crate) struct MutableBufferChunkStream {
-    /// Requested output schema (includes selection)
-    schema: SchemaRef,
-    chunk: Arc<MBChunk>,
-    table_name: Arc<String>,
-
-    /// Vector of record batches to send in reverse order (send data[len-1]
-    /// next) Is None until the first call to poll_next
-    data: Option<Vec<RecordBatch>>,
-}
-
-impl MutableBufferChunkStream {
-    pub fn new(chunk: Arc<MBChunk>, schema: SchemaRef, table_name: impl Into<String>) -> Self {
-        Self {
-            chunk,
-            schema,
-            table_name: Arc::new(table_name.into()),
-            data: None,
-        }
-    }
-
-    // gets the next batch, as needed
-    fn next_batch(&mut self) -> ArrowResult<Option<RecordBatch>> {
-        if self.data.is_none() {
-            // Want all the columns in the schema. Note we don't
-            // use `Selection::All` here because the mutable buffer chunk would interpret it
-            // as "all columns in the table in that chunk" rather than
-            // all columns this query needs
-            let selected_cols = self
-                .schema
-                .fields()
-                .iter()
-                .map(|f| f.name() as &str)
-                .collect::<Vec<_>>();
-            let selection = Selection::Some(&selected_cols);
-
-            let mut data = Vec::new();
-            self.chunk
-                .table_to_arrow(&mut data, self.table_name.as_ref(), selection)
-                .context(GettingTableData {
-                    table_name: self.table_name.as_ref(),
-                    chunk_id: self.chunk.id(),
-                })
-                .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
-
-            // reverse the array so we can pop off the back
-            data.reverse();
-            self.data = Some(data);
-        }
-
-        // self.data was set to Some above
-        Ok(self.data.as_mut().unwrap().pop())
-    }
-}
-
-impl RecordBatchStream for MutableBufferChunkStream {
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
-    }
-}
-
-impl futures::Stream for MutableBufferChunkStream {
-    type Item = ArrowResult<RecordBatch>;
-
-    fn poll_next(
-        mut self: std::pin::Pin<&mut Self>,
-        _: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(self.next_batch().transpose())
-    }
-
-    // TODO is there a useful size_hint to pass?
-}
-
 /// Adapter which will take a ReadFilterResults and make it an async stream
 pub struct ReadFilterResultsStream {
    read_results: ReadFilterResults,
@ -143,3 +44,42 @@ impl futures::Stream for ReadFilterResultsStream {

    // TODO is there a useful size_hint to pass?
 }
+
+/// A RecordBatchStream created from a single RecordBatch
+///
+/// Unfortunately datafusion's MemoryStream is crate-local
+#[derive(Debug)]
+pub(crate) struct MemoryStream {
+    schema: SchemaRef,
+    batch: Option<RecordBatch>,
+}
+
+impl MemoryStream {
+    pub fn new(batch: RecordBatch) -> Self {
+        Self {
+            schema: batch.schema(),
+            batch: Some(batch),
+        }
+    }
+}
+
+impl RecordBatchStream for MemoryStream {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+}
+
+impl futures::Stream for MemoryStream {
+    type Item = ArrowResult<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        _: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        Poll::Ready(self.batch.take().map(Ok))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (1, Some(1))
+    }
+}
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@ -67,6 +67,7 @@
    clippy::clone_on_ref_ptr
 )]

+use std::convert::TryInto;
 use std::sync::Arc;

 use async_trait::async_trait;
@ -83,11 +84,11 @@ use data_types::{
 };
 use influxdb_line_protocol::ParsedLine;
 use internal_types::{
-    data::{lines_to_replicated_write, ReplicatedWrite},
+    entry::{self, lines_to_sharded_entries, Entry},
    once::OnceNonZeroU32,
 };
 use object_store::{path::ObjectStorePath, ObjectStore, ObjectStoreApi};
-use query::{exec::Executor, Database, DatabaseStore};
+use query::{exec::Executor, DatabaseStore};
 use tracker::{TaskId, TaskRegistration, TaskRegistryWithHistory, TaskTracker, TrackedFutureExt};

 use futures::{pin_mut, FutureExt};
@ -98,15 +99,20 @@ use crate::{
    },
    db::Db,
 };
+use internal_types::entry::SequencedEntry;
 use std::num::NonZeroU32;

 pub mod buffer;
 mod config;
 pub mod db;
+mod query_tests;
 pub mod snapshot;

-#[cfg(test)]
-mod query_tests;
+// This module exposes `query_tests` outside of the crate so that it may be used
+// in benchmarks. Do not import this module for non-benchmark purposes!
+pub mod benchmarks {
+    pub use crate::query_tests::*;
+}

 type DatabaseError = Box<dyn std::error::Error + Send + Sync + 'static>;

@ -147,6 +153,12 @@ pub enum Error {
    DatabaseAlreadyExists { db_name: String },
    #[snafu(display("error appending to wal buffer: {}", source))]
    WalError { source: buffer::Error },
+    #[snafu(display("error converting line protocol to flatbuffers: {}", source))]
+    LineConversion { source: entry::Error },
+    #[snafu(display("error decoding entry flatbuffers: {}", source))]
+    DecodingEntry {
+        source: flatbuffers::InvalidFlatbuffer,
+    },
 }

 pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -179,6 +191,38 @@ impl JobRegistry {

 const STORE_ERROR_PAUSE_SECONDS: u64 = 100;

+/// Used to configure a server instance
+#[derive(Debug)]
+pub struct ServerConfig {
+    // number of executor worker threads. If not specified, defaults
+    // to number of cores on the system.
+    num_worker_threads: Option<usize>,
+
+    /// The `ObjectStore` instance to use for persistence
+    object_store: Arc<ObjectStore>,
+}
+
+impl ServerConfig {
+    /// Create a new config using the specified store
+    pub fn new(object_store: Arc<ObjectStore>) -> Self {
+        Self {
+            num_worker_threads: None,
+            object_store,
+        }
+    }
+
+    /// Use `num` worker threads for running queries
+    pub fn with_num_worker_threads(mut self, num: usize) -> Self {
+        self.num_worker_threads = Some(num);
+        self
+    }
+
+    /// return a reference to the object store in this configuration
+    pub fn store(&self) -> Arc<ObjectStore> {
+        Arc::clone(&self.object_store)
+    }
+}
+
 /// `Server` is the container struct for how servers store data internally, as
 /// well as how they communicate with other servers. Each server will have one
 /// of these structs, which keeps track of all replication and query rules.
@ -188,7 +232,7 @@ pub struct Server<M: ConnectionManager> {
    config: Arc<Config>,
    connection_manager: Arc<M>,
    pub store: Arc<ObjectStore>,
-    executor: Arc<Executor>,
+    exec: Arc<Executor>,
    jobs: Arc<JobRegistry>,
 }

@ -205,15 +249,21 @@ impl<E> From<Error> for UpdateError<E> {
 }

 impl<M: ConnectionManager> Server<M> {
-    pub fn new(connection_manager: M, store: Arc<ObjectStore>) -> Self {
+    pub fn new(connection_manager: M, config: ServerConfig) -> Self {
        let jobs = Arc::new(JobRegistry::new());

+        let ServerConfig {
+            num_worker_threads,
+            object_store,
+        } = config;
+        let num_worker_threads = num_worker_threads.unwrap_or_else(num_cpus::get);
+
        Self {
            id: Default::default(),
            config: Arc::new(Config::new(Arc::clone(&jobs))),
-            store,
+            store: object_store,
            connection_manager: Arc::new(connection_manager),
-            executor: Arc::new(Executor::new()),
+            exec: Arc::new(Executor::new(num_worker_threads)),
            jobs,
        }
    }
@ -232,12 +282,7 @@ impl<M: ConnectionManager> Server<M> {
    }

    /// Tells the server the set of rules for a database.
-    pub async fn create_database(
-        &self,
-        rules: DatabaseRules,
-        server_id: NonZeroU32,
-        object_store: Arc<ObjectStore>,
-    ) -> Result<()> {
+    pub async fn create_database(&self, rules: DatabaseRules, server_id: NonZeroU32) -> Result<()> {
        // Return an error if this server hasn't yet been setup with an id
        self.require_id()?;
        let db_reservation = self.config.create_db(rules)?;
@ -245,7 +290,7 @@ impl<M: ConnectionManager> Server<M> {
        self.persist_database_rules(db_reservation.rules().clone())
            .await?;

-        db_reservation.commit(server_id, object_store);
+        db_reservation.commit(server_id, Arc::clone(&self.store), Arc::clone(&self.exec));

        Ok(())
    }
@ -300,6 +345,7 @@ impl<M: ConnectionManager> Server<M> {
            .map(|mut path| {
                let store = Arc::clone(&self.store);
                let config = Arc::clone(&self.config);
+                let exec = Arc::clone(&self.exec);

                path.set_file_name(DB_RULES_FILE_NAME);

@ -325,7 +371,7 @@ impl<M: ConnectionManager> Server<M> {
                        }
                        Ok(rules) => match config.create_db(rules) {
                            Err(e) => error!("error adding database to config: {}", e),
-                            Ok(handle) => handle.commit(server_id, store),
+                            Ok(handle) => handle.commit(server_id, store, exec),
                        },
                    }
                })
@ -337,12 +383,12 @@ impl<M: ConnectionManager> Server<M> {
        Ok(())
    }

-    /// `write_lines` takes in raw line protocol and converts it to a
-    /// `ReplicatedWrite`, which is then replicated to other servers based
-    /// on the configuration of the `db`. This is step #1 from the crate
-    /// level documentation.
+    /// `write_lines` takes in raw line protocol and converts it to a collection
+    /// of ShardedEntry which are then sent to other IOx servers based on
+    /// the ShardConfig or sent to the local database for buffering in the
+    /// WriteBuffer and/or the MutableBuffer if configured.
    pub async fn write_lines(&self, db_name: &str, lines: &[ParsedLine<'_>]) -> Result<()> {
-        let id = self.require_id()?.get();
+        self.require_id()?;

        let db_name = DatabaseName::new(db_name).context(InvalidDatabaseName)?;
        let db = self
@ -350,62 +396,52 @@ impl<M: ConnectionManager> Server<M> {
            .db(&db_name)
            .context(DatabaseNotFound { db_name: &*db_name })?;

-        let sequence = db.next_sequence();
-        let write = lines_to_replicated_write(id, sequence, lines, &*db.rules.read());
+        let sharded_entries = lines_to_sharded_entries(
+            lines,
+            db.rules.read().shard_config.as_ref(),
+            &*db.rules.read(),
+        )
+        .context(LineConversion)?;

-        self.handle_replicated_write(&db_name, &db, write).await?;
+        for e in sharded_entries {
+            // TODO: handle sending to shards based on ShardConfig
+            self.handle_write_entry(&db, e.entry).await?;
+        }

        Ok(())
    }

-    pub async fn handle_replicated_write(
+    pub async fn write_entry(&self, db_name: &str, entry_bytes: Vec<u8>) -> Result<()> {
+        self.require_id()?;
+
+        let db_name = DatabaseName::new(db_name).context(InvalidDatabaseName)?;
+        let db = self
+            .config
+            .db(&db_name)
+            .context(DatabaseNotFound { db_name: &*db_name })?;
+
+        let entry = entry_bytes.try_into().context(DecodingEntry)?;
+        self.handle_write_entry(&db, entry).await
+    }
+
+    pub async fn handle_write_entry(&self, db: &Db, entry: Entry) -> Result<()> {
+        db.store_entry(entry)
+            .map_err(|e| Error::UnknownDatabaseError {
+                source: Box::new(e),
+            })?;
+
+        Ok(())
+    }
+
+    pub async fn handle_sequenced_entry(
        &self,
-        db_name: &DatabaseName<'_>,
        db: &Db,
-        write: ReplicatedWrite,
+        sequenced_entry: SequencedEntry,
    ) -> Result<()> {
-        match db.store_replicated_write(&write) {
-            Err(db::Error::DatabaseNotWriteable {}) | Ok(_) => {}
-            Err(e) => {
-                return Err(Error::UnknownDatabaseError {
-                    source: Box::new(e),
-                })
-            }
-        }
-
-        let write = Arc::new(write);
-
-        if let Some(wal_buffer) = &db.wal_buffer {
-            let persist;
-            let segment = {
-                let mut wal_buffer = wal_buffer.lock();
-                persist = wal_buffer.persist;
-
-                // TODO: address this issue?
-                // the mutable buffer and the wal buffer have different locking mechanisms,
-                // which means that it's possible for a mutable buffer write to
-                // succeed while a WAL buffer write fails, which would then
-                // return an error. A single lock is probably undesirable, but
-                // we need to figure out what semantics we want.
-                wal_buffer.append(Arc::clone(&write)).context(WalError)?
-            };
-
-            if let Some(segment) = segment {
-                if persist {
-                    let writer_id = self.require_id()?.get();
-                    let store = Arc::clone(&self.store);
-
-                    let (_, tracker) = self.jobs.register(Job::PersistSegment {
-                        writer_id,
-                        segment_id: segment.id,
-                    });
-
-                    segment
-                        .persist_bytes_in_background(tracker, writer_id, db_name, store)
-                        .context(WalError)?;
-                }
-            }
-        }
+        db.store_sequenced_entry(sequenced_entry)
+            .map_err(|e| Error::UnknownDatabaseError {
+                source: Box::new(e),
+            })?;

        Ok(())
    }
@ -574,12 +610,8 @@ where
        let db = match self.db(&db_name) {
            Some(db) => db,
            None => {
-                self.create_database(
-                    DatabaseRules::new(db_name.clone()),
-                    self.require_id()?,
-                    Arc::clone(&self.store),
-                )
-                .await?;
+                self.create_database(DatabaseRules::new(db_name.clone()), self.require_id()?)
+                    .await?;
                self.db(&db_name).expect("db not inserted")
            }
        };
@ -587,8 +619,9 @@ where
        Ok(db)
    }

+    /// Return a handle to the query executor
    fn executor(&self) -> Arc<Executor> {
-        Arc::clone(&self.executor)
+        Arc::clone(&self.exec)
    }
 }

@ -610,12 +643,17 @@ pub trait ConnectionManager {
 pub trait RemoteServer {
    type Error: std::error::Error + Send + Sync + 'static;

-    /// Sends a replicated write to a remote server. This is step #2 from the
-    /// diagram.
-    async fn replicate(
+    /// Sends an Entry to the remote server. An IOx server acting as a
+    /// router/sharder will call this method to send entries to remotes.
+    async fn write_entry(&self, db: &str, entry: Entry) -> Result<(), Self::Error>;
+
+    /// Sends a SequencedEntry to the remote server. An IOx server acting as a
+    /// write buffer will call this method to replicate to other write
+    /// buffer servers or to send data to downstream subscribers.
+    async fn write_sequenced_entry(
        &self,
        db: &str,
-        replicated_write: &ReplicatedWrite,
+        sequenced_entry: SequencedEntry,
    ) -> Result<(), Self::Error>;
 }

@ -643,10 +681,19 @@ pub struct RemoteServerImpl {}
 impl RemoteServer for RemoteServerImpl {
    type Error = Error;

-    async fn replicate(
+    /// Sends an Entry to the remote server. An IOx server acting as a
+    /// router/sharder will call this method to send entries to remotes.
+    async fn write_entry(&self, _db: &str, _entry: Entry) -> Result<(), Self::Error> {
+        unimplemented!()
+    }
+
+    /// Sends a SequencedEntry to the remote server. An IOx server acting as a
+    /// write buffer will call this method to replicate to other write
+    /// buffer servers or to send data to downstream subscribers.
+    async fn write_sequenced_entry(
        &self,
        _db: &str,
-        _replicated_write: &ReplicatedWrite,
+        _sequenced_entry: SequencedEntry,
    ) -> Result<(), Self::Error> {
        unimplemented!()
    }
@ -675,28 +722,27 @@ mod tests {

    use async_trait::async_trait;
    use futures::TryStreamExt;
-    use parking_lot::Mutex;
    use snafu::Snafu;
    use tokio::task::JoinHandle;
    use tokio_util::sync::CancellationToken;

-    use arrow_deps::{assert_table_eq, datafusion::physical_plan::collect};
-    use data_types::database_rules::{
-        PartitionTemplate, TemplatePart, WalBufferConfig, WalBufferRollover,
-    };
+    use arrow_deps::assert_table_eq;
+    use data_types::database_rules::{PartitionTemplate, TemplatePart, NO_SHARD_CONFIG};
    use influxdb_line_protocol::parse_lines;
    use object_store::{memory::InMemory, path::ObjectStorePath};
    use query::{frontend::sql::SQLQueryPlanner, Database};

-    use crate::buffer::Segment;
-
    use super::*;

+    fn config() -> ServerConfig {
+        ServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
+            .with_num_worker_threads(1)
+    }
+
    #[tokio::test]
    async fn server_api_calls_return_error_with_no_id_set() {
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Server::new(manager, store);
+        let server = Server::new(manager, config());

        let resp = server.require_id().unwrap_err();
        assert!(matches!(resp, Error::IdNotSet));
@ -709,8 +755,9 @@ mod tests {
    #[tokio::test]
    async fn create_database_persists_rules() {
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Server::new(manager, Arc::clone(&store));
+        let config = config();
+        let store = config.store();
+        let server = Server::new(manager, config);
        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();

        let name = DatabaseName::new("bananas").unwrap();
@ -727,11 +774,7 @@ mod tests {

        // Create a database
        server
-            .create_database(
-                rules.clone(),
-                server.require_id().unwrap(),
-                Arc::clone(&server.store),
-            )
+            .create_database(rules.clone(), server.require_id().unwrap())
            .await
            .expect("failed to create database");

@ -759,7 +802,6 @@ mod tests {
            .create_database(
                DatabaseRules::new(db2.clone()),
                server.require_id().unwrap(),
-                Arc::clone(&server.store),
            )
            .await
            .expect("failed to create 2nd db");
@ -767,7 +809,8 @@ mod tests {
        store.list_with_delimiter(&store.new_path()).await.unwrap();

        let manager = TestConnectionManager::new();
-        let server2 = Server::new(manager, store);
+        let config2 = ServerConfig::new(store).with_num_worker_threads(1);
+        let server2 = Server::new(manager, config2);
        server2.set_id(NonZeroU32::new(1).unwrap()).unwrap();
        server2.load_database_configs().await.unwrap();

@ -780,8 +823,7 @@ mod tests {
        // Covers #643

        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Server::new(manager, store);
+        let server = Server::new(manager, config());
        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();

        let name = DatabaseName::new("bananas").unwrap();
@ -791,7 +833,6 @@ mod tests {
            .create_database(
                DatabaseRules::new(name.clone()),
                server.require_id().unwrap(),
-                Arc::clone(&server.store),
            )
            .await
            .expect("failed to create database");
@ -801,7 +842,6 @@ mod tests {
            .create_database(
                DatabaseRules::new(name.clone()),
                server.require_id().unwrap(),
-                Arc::clone(&server.store),
            )
            .await
            .unwrap_err();
@ -814,8 +854,7 @@ mod tests {
    #[tokio::test]
    async fn db_names_sorted() {
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Server::new(manager, store);
+        let server = Server::new(manager, config());
        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();

        let names = vec!["bar", "baz"];
@ -823,11 +862,7 @@ mod tests {
        for name in &names {
            let name = DatabaseName::new(name.to_string()).unwrap();
            server
-                .create_database(
-                    DatabaseRules::new(name),
-                    server.require_id().unwrap(),
-                    Arc::clone(&server.store),
-                )
+                .create_database(DatabaseRules::new(name), server.require_id().unwrap())
                .await
                .expect("failed to create database");
        }
@ -839,17 +874,12 @@ mod tests {
    #[tokio::test]
    async fn writes_local() {
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Server::new(manager, store);
+        let server = Server::new(manager, config());
        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();

        let name = DatabaseName::new("foo".to_string()).unwrap();
        server
-            .create_database(
-                DatabaseRules::new(name),
-                server.require_id().unwrap(),
-                Arc::clone(&server.store),
-            )
+            .create_database(DatabaseRules::new(name), server.require_id().unwrap())
            .await
            .unwrap();

@ -864,10 +894,52 @@ mod tests {
        let executor = server.executor();
        let physical_plan = planner
            .query(db, "select * from cpu", executor.as_ref())
+            .unwrap();
+
+        let batches = executor.collect(physical_plan).await.unwrap();
+        let expected = vec![
+            "+-----+------+",
+            "| bar | time |",
+            "+-----+------+",
+            "| 1   | 10   |",
+            "+-----+------+",
+        ];
+        assert_table_eq!(expected, &batches);
+    }
+
+    #[tokio::test]
+    async fn write_entry_local() {
+        let manager = TestConnectionManager::new();
+        let server = Server::new(manager, config());
+        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+
+        let name = DatabaseName::new("foo".to_string()).unwrap();
+        server
+            .create_database(DatabaseRules::new(name), server.require_id().unwrap())
            .await
            .unwrap();

-        let batches = collect(physical_plan).await.unwrap();
+        let db_name = DatabaseName::new("foo").unwrap();
+        let db = server.db(&db_name).unwrap();
+
+        let line = "cpu bar=1 10";
+        let lines: Vec<_> = parse_lines(line).map(|l| l.unwrap()).collect();
+        let sharded_entries = lines_to_sharded_entries(&lines, NO_SHARD_CONFIG, &*db.rules.read())
+            .expect("sharded entries");
+
+        let entry = &sharded_entries[0].entry;
+        server
+            .write_entry("foo", entry.data().into())
+            .await
+            .expect("write entry");
+
+        let planner = SQLQueryPlanner::default();
+        let executor = server.executor();
+        let physical_plan = planner
+            .query(db, "select * from cpu", executor.as_ref())
+            .unwrap();
+
+        let batches = executor.collect(physical_plan).await.unwrap();
        let expected = vec![
            "+-----+------+",
            "| bar | time |",
@ -882,8 +954,7 @@ mod tests {
    async fn close_chunk() {
        test_helpers::maybe_start_logging();
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Arc::new(Server::new(manager, store));
+        let server = Arc::new(Server::new(manager, config()));

        let cancel_token = CancellationToken::new();
        let background_handle = spawn_worker(Arc::clone(&server), cancel_token.clone());
@ -895,7 +966,6 @@ mod tests {
            .create_database(
                DatabaseRules::new(db_name.clone()),
                server.require_id().unwrap(),
-                Arc::clone(&server.store),
            )
            .await
            .unwrap();
@ -945,71 +1015,10 @@ mod tests {
        let _ = background_handle.await;
    }

-    #[tokio::test]
-    async fn segment_persisted_on_rollover() {
-        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-
-        let server = Server::new(manager, Arc::clone(&store));
-        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        let db_name = DatabaseName::new("my_db").unwrap();
-        let rules = DatabaseRules {
-            name: db_name.clone(),
-            partition_template: Default::default(),
-            wal_buffer_config: Some(WalBufferConfig {
-                buffer_size: 500,
-                segment_size: 10,
-                buffer_rollover: WalBufferRollover::ReturnError,
-                store_segments: true,
-                close_segment_after: None,
-            }),
-            lifecycle_rules: Default::default(),
-            shard_config: None,
-        };
-        server
-            .create_database(
-                rules,
-                server.require_id().unwrap(),
-                Arc::clone(&server.store),
-            )
-            .await
-            .unwrap();
-
-        let lines = parsed_lines("disk,host=a used=10.1 12");
-        server.write_lines(db_name.as_str(), &lines).await.unwrap();
-
-        // write lines should have caused a segment rollover and persist, wait
-        tokio::task::yield_now().await;
-
-        let mut path = store.new_path();
-        path.push_all_dirs(&["1", "my_db", "wal", "000", "000"]);
-        path.set_file_name("001.segment");
-
-        let data = store
-            .get(&path)
-            .await
-            .unwrap()
-            .map_ok(|b| bytes::BytesMut::from(&b[..]))
-            .try_concat()
-            .await
-            .unwrap();
-
-        let segment = Segment::from_file_bytes(&data).unwrap();
-        assert_eq!(segment.writes.len(), 1);
-        let write = r#"
-writer:1, sequence:1, checksum:2741956553
-partition_key:
-  table:disk
-    host:a used:10.1 time:12
-"#;
-        assert_eq!(segment.writes[0].to_string(), write);
-    }
-
    #[tokio::test]
    async fn background_task_cleans_jobs() {
        let manager = TestConnectionManager::new();
-        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
-        let server = Arc::new(Server::new(manager, store));
+        let server = Arc::new(Server::new(manager, config()));

        let cancel_token = CancellationToken::new();
        let background_handle = spawn_worker(Arc::clone(&server), cancel_token.clone());
@ -1057,24 +1066,22 @@ partition_key:
    }

    #[derive(Debug, Default)]
-    struct TestRemoteServer {
-        writes: Mutex<BTreeMap<String, Vec<ReplicatedWrite>>>,
-    }
+    struct TestRemoteServer {}

    #[async_trait]
    impl RemoteServer for TestRemoteServer {
        type Error = TestClusterError;

-        async fn replicate(
-            &self,
-            db: &str,
-            replicated_write: &ReplicatedWrite,
-        ) -> Result<(), Self::Error> {
-            let mut writes = self.writes.lock();
-            let entries = writes.entry(db.to_string()).or_insert_with(Vec::new);
-            entries.push(replicated_write.clone());
+        async fn write_entry(&self, _db: &str, _entry: Entry) -> Result<(), Self::Error> {
+            unimplemented!()
+        }

-            Ok(())
+        async fn write_sequenced_entry(
+            &self,
+            _db: &str,
+            _sequenced_entry: SequencedEntry,
+        ) -> Result<(), Self::Error> {
+            unimplemented!()
        }
    }

--- a/server/src/query_tests/influxrpc.rs
+++ b/server/src/query_tests/influxrpc.rs
@ -1,3 +1,4 @@
+#![allow(unused_imports, dead_code, unused_macros)]
 pub mod field_columns;
 pub mod read_filter;
 pub mod read_group;
--- a/server/src/query_tests/influxrpc/field_columns.rs
+++ b/server/src/query_tests/influxrpc/field_columns.rs
@ -4,10 +4,7 @@ use arrow_deps::{
    datafusion::logical_plan::{col, lit},
 };
 use query::{
-    exec::{
-        fieldlist::{Field, FieldList},
-        Executor,
-    },
+    exec::fieldlist::{Field, FieldList},
    frontend::influxrpc::InfluxRPCPlanner,
    predicate::PredicateBuilder,
 };
@ -31,11 +28,10 @@ macro_rules! run_field_columns_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();
+            let executor = db.executor();

            let plan = planner
                .field_columns(&db, predicate.clone())
-                .await
                .expect("built plan successfully");
            let fields = executor
                .to_field_list(plan)
@ -133,11 +129,9 @@ async fn test_field_name_plan() {
        println!("Running scenario '{}'", scenario_name);
        println!("Predicate: '{:#?}'", predicate);
        let planner = InfluxRPCPlanner::new();
-        let executor = Executor::new();

        let plan = planner
            .field_columns(&db, predicate.clone())
-            .await
            .expect("built plan successfully");

        let mut plans = plan.plans;
@ -146,7 +140,8 @@ async fn test_field_name_plan() {

        // run the created plan directly, ensuring the output is as
        // expected (specifically that the column ordering is correct)
-        let results = executor
+        let results = db
+            .executor()
            .run_logical_plan(plan)
            .await
            .expect("ok running plan");
--- a/server/src/query_tests/influxrpc/read_filter.rs
+++ b/server/src/query_tests/influxrpc/read_filter.rs
@ -4,11 +4,11 @@ use crate::query_tests::scenarios::*;
 use arrow_deps::datafusion::logical_plan::{col, lit};
 use async_trait::async_trait;
 use query::{
-    exec::Executor,
    frontend::influxrpc::InfluxRPCPlanner,
    predicate::{Predicate, PredicateBuilder, EMPTY_PREDICATE},
 };

+#[derive(Debug)]
 pub struct TwoMeasurementsMultiSeries {}
 #[async_trait]
 impl DBSetup for TwoMeasurementsMultiSeries {
@ -46,14 +46,12 @@ macro_rules! run_read_filter_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plan = planner
                .read_filter(&db, predicate.clone())
-                .await
                .expect("built plan successfully");

-            let string_results = run_series_set_plan(executor, plan).await;
+            let string_results = run_series_set_plan(db.executor(), plan).await;

            assert_eq!(
                expected_results, string_results,
@ -310,6 +308,7 @@ async fn test_read_filter_data_pred_unsupported_in_scan() {
    run_read_filter_test_case!(TwoMeasurementsMultiSeries {}, predicate, expected_results);
 }

+#[derive(Debug)]
 pub struct MeasurementsSortableTags {}
 #[async_trait]
 impl DBSetup for MeasurementsSortableTags {
--- a/server/src/query_tests/influxrpc/read_group.rs
+++ b/server/src/query_tests/influxrpc/read_group.rs
@ -4,7 +4,6 @@ use crate::query_tests::scenarios::*;
 use arrow_deps::{arrow::util::pretty::pretty_format_batches, datafusion::prelude::*};
 use async_trait::async_trait;
 use query::{
-    exec::Executor,
    frontend::influxrpc::InfluxRPCPlanner,
    group_by::Aggregate,
    predicate::{Predicate, PredicateBuilder},
@ -26,11 +25,9 @@ macro_rules! run_read_group_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plans = planner
                .read_group(&db, predicate.clone(), agg, &group_columns)
-                .await
                .expect("built plan successfully");

            let plans = plans.into_inner();
@ -46,7 +43,8 @@ macro_rules! run_read_group_test_case {

            let mut string_results = vec![];
            for plan in plans.into_iter() {
-                let batches = executor
+                let batches = db
+                    .executor()
                    .run_logical_plan(plan.plan)
                    .await
                    .expect("ok running plan");
--- a/server/src/query_tests/influxrpc/read_window_aggregate.rs
+++ b/server/src/query_tests/influxrpc/read_window_aggregate.rs
@ -1,14 +1,15 @@
 //! Tests for the Influx gRPC queries
-use crate::query_tests::{scenarios::*, utils::make_db};
+use crate::{
+    db::test_helpers::write_lp,
+    query_tests::{scenarios::*, utils::make_db},
+};

 use arrow_deps::{arrow::util::pretty::pretty_format_batches, datafusion::prelude::*};
 use async_trait::async_trait;
 use query::{
-    exec::Executor,
    frontend::influxrpc::InfluxRPCPlanner,
    group_by::{Aggregate, WindowDuration},
    predicate::{Predicate, PredicateBuilder},
-    test::TestLPWriter,
 };

 /// runs read_window_aggregate(predicate) and compares it to the expected
@ -28,18 +29,17 @@ macro_rules! run_read_window_aggregate_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plans = planner
                .read_window_aggregate(&db, predicate.clone(), agg, every.clone(), offset.clone())
-                .await
                .expect("built plan successfully");

            let plans = plans.into_inner();

            let mut string_results = vec![];
            for plan in plans.into_iter() {
-                let batches = executor
+                let batches = db
+                    .executor()
                    .run_logical_plan(plan.plan)
                    .await
                    .expect("ok running plan");
@ -162,18 +162,16 @@ impl DBSetup for MeasurementForWindowAggregateMonths {
        // "2020-04-02T00"]

        let db = make_db();
-        let mut writer = TestLPWriter::default();
        let data = lp_lines.join("\n");
-        writer.write_lp_string(&db, &data).unwrap();
+        write_lp(&db, &data);
        let scenario1 = DBScenario {
            scenario_name: "Data in 4 partitions, open chunks of mutable buffer".into(),
            db,
        };

        let db = make_db();
-        let mut writer = TestLPWriter::default();
        let data = lp_lines.join("\n");
-        writer.write_lp_string(&db, &data).unwrap();
+        write_lp(&db, &data);
        db.rollover_partition("2020-03-01T00").await.unwrap();
        db.rollover_partition("2020-03-02T00").await.unwrap();
        let scenario2 = DBScenario {
@ -184,9 +182,8 @@ impl DBSetup for MeasurementForWindowAggregateMonths {
        };

        let db = make_db();
-        let mut writer = TestLPWriter::default();
        let data = lp_lines.join("\n");
-        writer.write_lp_string(&db, &data).unwrap();
+        write_lp(&db, &data);
        rollover_and_load(&db, "2020-03-01T00").await;
        rollover_and_load(&db, "2020-03-02T00").await;
        rollover_and_load(&db, "2020-04-01T00").await;
--- a/server/src/query_tests/influxrpc/table_names.rs
+++ b/server/src/query_tests/influxrpc/table_names.rs
@ -1,9 +1,6 @@
 //! Tests for the Influx gRPC queries
 use query::{
-    exec::{
-        stringset::{IntoStringSet, StringSetRef},
-        Executor,
-    },
+    exec::stringset::{IntoStringSet, StringSetRef},
    frontend::influxrpc::InfluxRPCPlanner,
    predicate::{Predicate, PredicateBuilder, EMPTY_PREDICATE},
 };
@ -23,13 +20,12 @@ macro_rules! run_table_names_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plan = planner
                .table_names(&db, predicate.clone())
-                .await
                .expect("built plan successfully");
-            let names = executor
+            let names = db
+                .executor()
                .to_string_set(plan)
                .await
                .expect("converted plan to strings successfully");
--- a/server/src/query_tests/influxrpc/tag_keys.rs
+++ b/server/src/query_tests/influxrpc/tag_keys.rs
@ -1,9 +1,6 @@
 use arrow_deps::datafusion::logical_plan::{col, lit};
 use query::{
-    exec::{
-        stringset::{IntoStringSet, StringSetRef},
-        Executor,
-    },
+    exec::stringset::{IntoStringSet, StringSetRef},
    frontend::influxrpc::InfluxRPCPlanner,
    predicate::PredicateBuilder,
 };
@ -27,13 +24,12 @@ macro_rules! run_tag_keys_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plan = planner
                .tag_keys(&db, predicate.clone())
-                .await
                .expect("built plan successfully");
-            let names = executor
+            let names = db
+                .executor()
                .to_string_set(plan)
                .await
                .expect("converted plan to strings successfully");
--- a/server/src/query_tests/influxrpc/tag_values.rs
+++ b/server/src/query_tests/influxrpc/tag_values.rs
@ -1,9 +1,6 @@
 use arrow_deps::datafusion::logical_plan::{col, lit};
 use query::{
-    exec::{
-        stringset::{IntoStringSet, StringSetRef},
-        Executor,
-    },
+    exec::stringset::{IntoStringSet, StringSetRef},
    frontend::influxrpc::InfluxRPCPlanner,
    predicate::PredicateBuilder,
 };
@ -25,13 +22,12 @@ macro_rules! run_tag_values_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("Predicate: '{:#?}'", predicate);
            let planner = InfluxRPCPlanner::new();
-            let executor = Executor::new();

            let plan = planner
                .tag_values(&db, &tag_name, predicate.clone())
-                .await
                .expect("built plan successfully");
-            let names = executor
+            let names = db
+                .executor()
                .to_string_set(plan)
                .await
                .expect("converted plan to strings successfully");
@ -239,7 +235,7 @@ async fn list_tag_values_field_col() {

        // Test: temp is a field, not a tag
        let tag_name = "temp";
-        let plan_result = planner.tag_values(&db, &tag_name, predicate.clone()).await;
+        let plan_result = planner.tag_values(&db, &tag_name, predicate.clone());

        assert_eq!(
            plan_result.unwrap_err().to_string(),
--- a/server/src/query_tests/influxrpc/util.rs
+++ b/server/src/query_tests/influxrpc/util.rs
@ -51,7 +51,7 @@ pub fn dump_series_set(s: SeriesSet) -> Vec<String> {
 }

 /// Run a series set plan to completion and produce a Vec<String> representation
-pub async fn run_series_set_plan(executor: Executor, plans: SeriesSetPlans) -> Vec<String> {
+pub async fn run_series_set_plan(executor: Arc<Executor>, plans: SeriesSetPlans) -> Vec<String> {
    // Use a channel sufficiently large to buffer the series
    let (tx, mut rx) = mpsc::channel(100);
    executor
--- a/server/src/query_tests/scenarios.rs
+++ b/server/src/query_tests/scenarios.rs
@ -1,14 +1,16 @@
 //! This module contains testing scenarios for Db

-use query::{test::TestLPWriter, PartitionChunk};
+#[allow(unused_imports, dead_code, unused_macros)]
+use query::PartitionChunk;

 use async_trait::async_trait;

-use crate::db::Db;
+use crate::db::{test_helpers::write_lp, Db};

 use super::utils::{count_mutable_buffer_chunks, count_read_buffer_chunks, make_db};

 /// Holds a database and a description of how its data was configured
+#[derive(Debug)]
 pub struct DBScenario {
    pub scenario_name: String,
    pub db: Db,
@ -22,6 +24,7 @@ pub trait DBSetup {
 }

 /// No data
+#[derive(Debug)]
 pub struct NoData {}
 #[async_trait]
 impl DBSetup for NoData {
@ -47,8 +50,7 @@ impl DBSetup for NoData {

        let db = make_db();
        let data = "cpu,region=west user=23.2 100";
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, data).unwrap();
+        write_lp(&db, data);
        // move data out of open chunk
        assert_eq!(db.rollover_partition(partition_key).await.unwrap().id(), 0);

@ -77,6 +79,7 @@ impl DBSetup for NoData {
 }

 /// Two measurements data in a single mutable buffer chunk
+#[derive(Debug)]
 pub struct TwoMeasurements {}
 #[async_trait]
 impl DBSetup for TwoMeasurements {
@ -92,6 +95,7 @@ impl DBSetup for TwoMeasurements {
    }
 }

+#[derive(Debug)]
 pub struct TwoMeasurementsUnsignedType {}
 #[async_trait]
 impl DBSetup for TwoMeasurementsUnsignedType {
@ -110,6 +114,7 @@ impl DBSetup for TwoMeasurementsUnsignedType {

 /// Single measurement that has several different chunks with
 /// different (but compatible) schema
+#[derive(Debug)]
 pub struct MultiChunkSchemaMerge {}
 #[async_trait]
 impl DBSetup for MultiChunkSchemaMerge {
@ -129,6 +134,7 @@ impl DBSetup for MultiChunkSchemaMerge {
 }

 /// Two measurements data with many null values
+#[derive(Debug)]
 pub struct TwoMeasurementsManyNulls {}
 #[async_trait]
 impl DBSetup for TwoMeasurementsManyNulls {
@ -150,6 +156,7 @@ impl DBSetup for TwoMeasurementsManyNulls {
    }
 }

+#[derive(Debug)]
 pub struct TwoMeasurementsManyFields {}
 #[async_trait]
 impl DBSetup for TwoMeasurementsManyFields {
@ -169,12 +176,12 @@ impl DBSetup for TwoMeasurementsManyFields {
    }
 }

+#[derive(Debug)]
 pub struct TwoMeasurementsManyFieldsOneChunk {}
 #[async_trait]
 impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
    async fn make(&self) -> Vec<DBScenario> {
        let db = make_db();
-        let mut writer = TestLPWriter::default();

        let lp_lines = vec![
            "h2o,state=MA,city=Boston temp=70.4 50",
@ -184,7 +191,7 @@ impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
            "o2,state=CA temp=79.0 300",
        ];

-        writer.write_lp_string(&db, &lp_lines.join("\n")).unwrap();
+        write_lp(&db, &lp_lines.join("\n"));
        vec![DBScenario {
            scenario_name: "Data in open chunk of mutable buffer".into(),
            db,
@ -192,6 +199,7 @@ impl DBSetup for TwoMeasurementsManyFieldsOneChunk {
    }
 }

+#[derive(Debug)]
 pub struct OneMeasurementManyFields {}
 #[async_trait]
 impl DBSetup for OneMeasurementManyFields {
@ -212,6 +220,7 @@ impl DBSetup for OneMeasurementManyFields {
 }

 /// This data (from end to end test)
+#[derive(Debug)]
 pub struct EndToEndTest {}
 #[async_trait]
 impl DBSetup for EndToEndTest {
@ -231,9 +240,7 @@ impl DBSetup for EndToEndTest {
        let lp_data = lp_lines.join("\n");

        let db = make_db();
-        let mut writer = TestLPWriter::default();
-        let res = writer.write_lp_string(&db, &lp_data);
-        assert!(res.is_ok(), "Error: {}", res.unwrap_err());
+        write_lp(&db, &lp_data);

        let scenario1 = DBScenario {
            scenario_name: "Data in open chunk of mutable buffer".into(),
@ -251,16 +258,14 @@ impl DBSetup for EndToEndTest {
 /// Data in one only read buffer chunk
 pub(crate) async fn make_one_chunk_scenarios(partition_key: &str, data: &str) -> Vec<DBScenario> {
    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data).unwrap();
+    write_lp(&db, data);
    let scenario1 = DBScenario {
        scenario_name: "Data in open chunk of mutable buffer".into(),
        db,
    };

    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data).unwrap();
+    write_lp(&db, data);
    db.rollover_partition(partition_key).await.unwrap();
    let scenario2 = DBScenario {
        scenario_name: "Data in closed chunk of mutable buffer".into(),
@ -268,8 +273,7 @@ pub(crate) async fn make_one_chunk_scenarios(partition_key: &str, data: &str) ->
    };

    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data).unwrap();
+    write_lp(&db, data);
    db.rollover_partition(partition_key).await.unwrap();
    db.load_chunk_to_read_buffer(partition_key, 0)
        .await
@ -294,9 +298,8 @@ pub async fn make_two_chunk_scenarios(
    data2: &str,
 ) -> Vec<DBScenario> {
    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data1).unwrap();
-    writer.write_lp_string(&db, data2).unwrap();
+    write_lp(&db, data1);
+    write_lp(&db, data2);
    let scenario1 = DBScenario {
        scenario_name: "Data in single open chunk of mutable buffer".into(),
        db,
@ -304,10 +307,9 @@ pub async fn make_two_chunk_scenarios(

    // spread across 2 mutable buffer chunks
    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data1).unwrap();
+    write_lp(&db, data1);
    db.rollover_partition(partition_key).await.unwrap();
-    writer.write_lp_string(&db, data2).unwrap();
+    write_lp(&db, data2);
    let scenario2 = DBScenario {
        scenario_name: "Data in one open chunk and one closed chunk of mutable buffer".into(),
        db,
@ -315,13 +317,12 @@ pub async fn make_two_chunk_scenarios(

    // spread across 1 mutable buffer, 1 read buffer chunks
    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data1).unwrap();
+    write_lp(&db, data1);
    db.rollover_partition(partition_key).await.unwrap();
    db.load_chunk_to_read_buffer(partition_key, 0)
        .await
        .unwrap();
-    writer.write_lp_string(&db, data2).unwrap();
+    write_lp(&db, data2);
    let scenario3 = DBScenario {
        scenario_name: "Data in open chunk of mutable buffer, and one chunk of read buffer".into(),
        db,
@ -329,10 +330,9 @@ pub async fn make_two_chunk_scenarios(

    // in 2 read buffer chunks
    let db = make_db();
-    let mut writer = TestLPWriter::default();
-    writer.write_lp_string(&db, data1).unwrap();
+    write_lp(&db, data1);
    db.rollover_partition(partition_key).await.unwrap();
-    writer.write_lp_string(&db, data2).unwrap();
+    write_lp(&db, data2);
    db.rollover_partition(partition_key).await.unwrap();

    db.load_chunk_to_read_buffer(partition_key, 0)
--- a/server/src/query_tests/sql.rs
+++ b/server/src/query_tests/sql.rs
@ -3,11 +3,11 @@
 //! wired all the pieces together (as well as ensure any particularly
 //! important SQL does not regress)

+#![allow(unused_imports, dead_code, unused_macros)]
+
 use super::scenarios::*;
-use arrow_deps::{
-    arrow::record_batch::RecordBatch, assert_table_eq, datafusion::physical_plan::collect,
-};
-use query::{exec::Executor, frontend::sql::SQLQueryPlanner};
+use arrow_deps::{arrow::record_batch::RecordBatch, assert_batches_sorted_eq};
+use query::frontend::sql::SQLQueryPlanner;
 use std::sync::Arc;

 /// runs table_names(predicate) and compares it to the expected
@ -25,16 +25,16 @@ macro_rules! run_sql_test_case {
            println!("Running scenario '{}'", scenario_name);
            println!("SQL: '{:#?}'", sql);
            let planner = SQLQueryPlanner::default();
-            let executor = Executor::new();
+            let executor = db.executor();

            let physical_plan = planner
-                .query(db, &sql, &executor)
-                .await
+                .query(db, &sql, executor.as_ref())
                .expect("built plan successfully");

-            let results: Vec<RecordBatch> = collect(physical_plan).await.expect("Running plan");
+            let results: Vec<RecordBatch> =
+                executor.collect(physical_plan).await.expect("Running plan");

-            assert_table_eq!($EXPECTED_LINES, &results);
+            assert_batches_sorted_eq!($EXPECTED_LINES, &results);
        }
    };
 }
@ -278,7 +278,7 @@ async fn sql_select_from_system_tables() {
        "+----+---------------+-------------------+-----------------+",
        "| id | partition_key | storage           | estimated_bytes |",
        "+----+---------------+-------------------+-----------------+",
-        "| 0  | 1970-01-01T00 | OpenMutableBuffer | 493             |",
+        "| 0  | 1970-01-01T00 | OpenMutableBuffer | 453             |",
        "+----+---------------+-------------------+-----------------+",
    ];
    run_sql_test_case!(
@ -291,13 +291,13 @@ async fn sql_select_from_system_tables() {
        "+---------------+------------+-------------+-------+",
        "| partition_key | table_name | column_name | count |",
        "+---------------+------------+-------------+-------+",
-        "| 1970-01-01T00 | h2o        | state       | 3     |",
        "| 1970-01-01T00 | h2o        | city        | 3     |",
+        "| 1970-01-01T00 | h2o        | other_temp  | 2     |",
+        "| 1970-01-01T00 | h2o        | state       | 3     |",
        "| 1970-01-01T00 | h2o        | temp        | 1     |",
        "| 1970-01-01T00 | h2o        | time        | 3     |",
-        "| 1970-01-01T00 | h2o        | other_temp  | 2     |",
-        "| 1970-01-01T00 | o2         | state       | 2     |",
        "| 1970-01-01T00 | o2         | city        | 1     |",
+        "| 1970-01-01T00 | o2         | state       | 2     |",
        "| 1970-01-01T00 | o2         | temp        | 2     |",
        "| 1970-01-01T00 | o2         | time        | 2     |",
        "| 1970-01-01T00 | o2         | reading     | 1     |",
--- a/server/src/query_tests/table_schema.rs
+++ b/server/src/query_tests/table_schema.rs
@ -1,5 +1,7 @@
 //! Tests for the table_names implementation

+#![allow(unused_imports, dead_code, unused_macros)]
+
 use arrow_deps::arrow::datatypes::DataType;
 use internal_types::{schema::builder::SchemaBuilder, selection::Selection};
 use query::{Database, PartitionChunk};
--- a/server/src/query_tests/utils.rs
+++ b/server/src/query_tests/utils.rs
@ -4,7 +4,7 @@ use data_types::{
    DatabaseName,
 };
 use object_store::{memory::InMemory, ObjectStore};
-use query::Database;
+use query::{exec::Executor, Database};

 use crate::{db::Db, JobRegistry};
 use std::{num::NonZeroU32, sync::Arc};
@ -13,11 +13,25 @@ use std::{num::NonZeroU32, sync::Arc};
 pub fn make_db() -> Db {
    let server_id: NonZeroU32 = NonZeroU32::new(1).unwrap();
    let object_store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
+    let exec = Arc::new(Executor::new(1));

    Db::new(
        DatabaseRules::new(DatabaseName::new("placeholder").unwrap()),
        server_id,
        object_store,
+        exec,
+        None, // wal buffer
+        Arc::new(JobRegistry::new()),
+    )
+}
+
+pub fn make_database(server_id: NonZeroU32, object_store: Arc<ObjectStore>, db_name: &str) -> Db {
+    let exec = Arc::new(Executor::new(1));
+    Db::new(
+        DatabaseRules::new(DatabaseName::new(db_name.to_string()).unwrap()),
+        server_id,
+        object_store,
+        exec,
        None, // wal buffer
        Arc::new(JobRegistry::new()),
    )
--- a/server/src/snapshot.rs
+++ b/server/src/snapshot.rs
@ -273,12 +273,13 @@ mod tests {
    };

    use super::*;
+    use crate::db::test_helpers::write_lp;
    use data_types::database_rules::DatabaseRules;
    use data_types::DatabaseName;
    use futures::TryStreamExt;
    use mutable_buffer::chunk::Chunk as ChunkWB;
    use object_store::memory::InMemory;
-    use query::{test::TestLPWriter, Database};
+    use query::{exec::Executor, Database};
    use tracker::MemRegistry;

    #[tokio::test]
@ -291,8 +292,7 @@ mem,host=A,region=west used=45 1
        "#;

        let db = make_db();
-        let mut writer = TestLPWriter::default();
-        writer.write_lp_string(&db, &lp).unwrap();
+        write_lp(&db, &lp);

        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
        let (tx, rx) = tokio::sync::oneshot::channel();
@ -354,9 +354,7 @@ mem,host=A,region=west used=45 1
        let registry = MemRegistry::new();
        let store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
        let chunk = Arc::new(DBChunk::MutableBuffer {
-            chunk: Arc::new(ChunkWB::new(11, &registry)),
-            partition_key: Arc::new("key".to_string()),
-            open: false,
+            chunk: ChunkWB::new(11, &registry).snapshot(),
        });
        let mut metadata_path = store.new_path();
        metadata_path.push_dir("meta");
@ -393,11 +391,13 @@ mem,host=A,region=west used=45 1
    pub fn make_db() -> Db {
        let object_store = Arc::new(ObjectStore::new_in_memory(InMemory::new()));
        let server_id = std::num::NonZeroU32::new(1).unwrap();
+        let exec = Arc::new(Executor::new(1));

        Db::new(
            DatabaseRules::new(DatabaseName::new("placeholder").unwrap()),
            server_id,
            object_store,
+            exec,
            None, // wal buffer
            Arc::new(JobRegistry::new()),
        )
--- a/src/commands/database.rs
+++ b/src/commands/database.rs
@ -105,6 +105,10 @@ struct Create {
    #[structopt(long)]
    drop_non_persisted: bool,

+    /// Persists chunks to object storage.
+    #[structopt(long)]
+    persist: bool,
+
    /// Do not allow writing new data to this database
    #[structopt(long)]
    immutable: bool,
@ -173,6 +177,7 @@ pub async fn command(url: String, config: Config) -> Result<()> {
                    buffer_size_hard: command.buffer_size_hard as _,
                    sort_order: None, // Server-side default
                    drop_non_persisted: command.drop_non_persisted,
+                    persist: command.persist,
                    immutable: command.immutable,
                }),

--- a/src/commands/run.rs
+++ b/src/commands/run.rs
@ -106,6 +106,16 @@ pub struct Config {
    #[structopt(long = "--data-dir", env = "INFLUXDB_IOX_DB_DIR")]
    pub database_directory: Option<PathBuf>,

+    /// The number of threads to use for the query worker pool.
+    ///
+    /// IOx uses `--num-threads` threads for handling API requests and
+    /// will use a dedicated thread pool woth `--num-worker-threads`
+    /// for running queries.
+    ///
+    /// If not specified, defaults to the number of cores on the system
+    #[structopt(long = "--num-worker-threads", env = "INFLUXDB_IOX_NUM_WORKER_THREADS")]
+    pub num_worker_threads: Option<usize>,
+
    #[structopt(
    long = "--object-store",
    env = "INFLUXDB_IOX_OBJECT_STORE",
--- a/src/influxdb_ioxd.rs
+++ b/src/influxdb_ioxd.rs
@ -10,7 +10,10 @@ use object_store::{
 };
 use observability_deps::tracing::{self, error, info, warn, Instrument};
 use panic_logging::SendPanicsToTracing;
-use server::{ConnectionManagerImpl as ConnectionManager, Server as AppServer};
+use server::{
+    ConnectionManagerImpl as ConnectionManager, Server as AppServer,
+    ServerConfig as AppServerConfig,
+};
 use snafu::{ResultExt, Snafu};
 use std::{convert::TryFrom, fs, net::SocketAddr, path::PathBuf, sync::Arc};

@ -124,9 +127,20 @@ pub async fn main(logging_level: LoggingLevel, config: Config) -> Result<()> {

    let object_store = ObjectStore::try_from(&config)?;
    let object_storage = Arc::new(object_store);
+    let server_config = AppServerConfig::new(object_storage);
+
+    let server_config = if let Some(n) = config.num_worker_threads {
+        info!(
+            num_worker_threads = n,
+            "Using specified number of worker threads"
+        );
+        server_config.with_num_worker_threads(n)
+    } else {
+        server_config
+    };

    let connection_manager = ConnectionManager {};
-    let app_server = Arc::new(AppServer::new(connection_manager, object_storage));
+    let app_server = Arc::new(AppServer::new(connection_manager, server_config));

    // if this ID isn't set the server won't be usable until this is set via an API
    // call
--- a/src/influxdb_ioxd/http.rs
+++ b/src/influxdb_ioxd/http.rs
@ -12,7 +12,6 @@

 // Influx crates
 use super::super::commands::metrics;
-use arrow_deps::datafusion::physical_plan::collect;
 use data_types::{
    http::WalMetadataQuery,
    names::{org_and_bucket_to_database, OrgBucketMappingError},
@ -32,7 +31,7 @@ use http::header::{CONTENT_ENCODING, CONTENT_TYPE};
 use hyper::{Body, Method, Request, Response, StatusCode};
 use observability_deps::{
    opentelemetry::KeyValue,
-    tracing::{self, debug, error, info},
+    tracing::{self, debug, error},
 };
 use routerify::{prelude::*, Middleware, RequestInfo, Router, RouterError, RouterService};
 use serde::Deserialize;
@ -312,11 +311,11 @@ where
    Router::builder()
        .data(server)
        .middleware(Middleware::pre(|req| async move {
-            info!(request = ?req, "Processing request");
+            debug!(request = ?req, "Processing request");
            Ok(req)
        }))
        .middleware(Middleware::post(|res| async move {
-            info!(response = ?res, "Successfully processed request");
+            debug!(response = ?res, "Successfully processed request");
            Ok(res)
        })) // this endpoint is for API backward compatibility with InfluxDB 2.x
        .post("/api/v2/write", write::<M>)
@ -523,12 +522,12 @@ async fn query<M: ConnectionManager + Send + Sync + Debug + 'static>(

    let physical_plan = planner
        .query(db, &q, executor.as_ref())
-        .await
        .context(PlanningSQLQuery { query: &q })?;

    // TODO: stream read results out rather than rendering the
    // whole thing in mem
-    let batches = collect(physical_plan)
+    let batches = executor
+        .collect(physical_plan)
        .await
        .map_err(|e| Box::new(e) as _)
        .context(Query { db_name })?;
@ -733,27 +732,24 @@ mod tests {
    use std::net::{IpAddr, Ipv4Addr, SocketAddr};

    use arrow_deps::{arrow::record_batch::RecordBatch, assert_table_eq};
-    use query::exec::Executor;
    use reqwest::{Client, Response};

-    use data_types::{
-        database_rules::{DatabaseRules, WalBufferConfig, WalBufferRollover},
-        wal::WriterSummary,
-        DatabaseName,
-    };
+    use data_types::{database_rules::DatabaseRules, DatabaseName};
    use object_store::{memory::InMemory, ObjectStore};
    use serde::de::DeserializeOwned;
-    use server::{db::Db, ConnectionManagerImpl};
+    use server::{db::Db, ConnectionManagerImpl, ServerConfig as AppServerConfig};
    use std::num::NonZeroU32;
    use test_helpers::assert_contains;

+    fn config() -> AppServerConfig {
+        AppServerConfig::new(Arc::new(ObjectStore::new_in_memory(InMemory::new())))
+            .with_num_worker_threads(1)
+    }
+
    #[tokio::test]
    async fn test_health() {
-        let test_storage = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        let server_url = test_server(Arc::clone(&test_storage));
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();
        let response = client.get(&format!("{}/health", server_url)).send().await;
@ -764,20 +760,16 @@ mod tests {

    #[tokio::test]
    async fn test_write() {
-        let test_storage = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        test_storage
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+        app_server
            .create_database(
                DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
-                test_storage.require_id().unwrap(),
-                Arc::clone(&test_storage.store),
+                app_server.require_id().unwrap(),
            )
            .await
            .unwrap();
-        let server_url = test_server(Arc::clone(&test_storage));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();

@ -798,7 +790,7 @@ mod tests {
        check_response("write", response, StatusCode::NO_CONTENT, "").await;

        // Check that the data got into the right bucket
-        let test_db = test_storage
+        let test_db = app_server
            .db(&DatabaseName::new("MyOrg_MyBucket").unwrap())
            .expect("Database exists");

@ -816,20 +808,16 @@ mod tests {
    #[tokio::test]
    async fn test_write_metrics() {
        metrics::init_metrics_for_test();
-        let test_storage = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        test_storage
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+        app_server
            .create_database(
                DatabaseRules::new(DatabaseName::new("MetricsOrg_MetricsBucket").unwrap()),
-                test_storage.require_id().unwrap(),
-                Arc::clone(&test_storage.store),
+                app_server.require_id().unwrap(),
            )
            .await
            .unwrap();
-        let server_url = test_server(Arc::clone(&test_storage));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();

@ -878,20 +866,16 @@ mod tests {
    /// returns a client for communicting with the server, and the server
    /// endpoint
    async fn setup_test_data() -> (Client, String) {
-        let test_storage: Arc<AppServer<ConnectionManagerImpl>> = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        test_storage
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+        app_server
            .create_database(
                DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
-                test_storage.require_id().unwrap(),
-                Arc::clone(&test_storage.store),
+                app_server.require_id().unwrap(),
            )
            .await
            .unwrap();
-        let server_url = test_server(Arc::clone(&test_storage));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();

@ -1015,20 +999,16 @@ mod tests {

    #[tokio::test]
    async fn test_gzip_write() {
-        let test_storage = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        test_storage
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+        app_server
            .create_database(
                DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
-                test_storage.require_id().unwrap(),
-                Arc::clone(&test_storage.store),
+                app_server.require_id().unwrap(),
            )
            .await
            .unwrap();
-        let server_url = test_server(Arc::clone(&test_storage));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();
        let lp_data = "h2o_temperature,location=santa_monica,state=CA surface_degrees=65.2,bottom_degrees=50.4 1568756160";
@ -1049,7 +1029,7 @@ mod tests {
        check_response("gzip_write", response, StatusCode::NO_CONTENT, "").await;

        // Check that the data got into the right bucket
-        let test_db = test_storage
+        let test_db = app_server
            .db(&DatabaseName::new("MyOrg_MyBucket").unwrap())
            .expect("Database exists");

@ -1067,20 +1047,16 @@ mod tests {

    #[tokio::test]
    async fn write_to_invalid_database() {
-        let test_storage = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        test_storage.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        test_storage
+        let app_server = Arc::new(AppServer::new(ConnectionManagerImpl {}, config()));
+        app_server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
+        app_server
            .create_database(
                DatabaseRules::new(DatabaseName::new("MyOrg_MyBucket").unwrap()),
-                test_storage.require_id().unwrap(),
-                Arc::clone(&test_storage.store),
+                app_server.require_id().unwrap(),
            )
            .await
            .unwrap();
-        let server_url = test_server(Arc::clone(&test_storage));
+        let server_url = test_server(Arc::clone(&app_server));

        let client = Client::new();

@ -1103,115 +1079,6 @@ mod tests {
        .await;
    }

-    #[tokio::test]
-    async fn get_wal_meta() {
-        let server = Arc::new(AppServer::new(
-            ConnectionManagerImpl {},
-            Arc::new(ObjectStore::new_in_memory(InMemory::new())),
-        ));
-        server.set_id(NonZeroU32::new(1).unwrap()).unwrap();
-        let server_url = test_server(Arc::clone(&server));
-
-        let database_name = "foo_bar";
-        let rules = DatabaseRules {
-            name: DatabaseName::new(database_name).unwrap(),
-            partition_template: Default::default(),
-            wal_buffer_config: Some(WalBufferConfig {
-                buffer_size: 500,
-                segment_size: 10,
-                buffer_rollover: WalBufferRollover::ReturnError,
-                store_segments: true,
-                close_segment_after: None,
-            }),
-            lifecycle_rules: Default::default(),
-            shard_config: None,
-        };
-
-        server
-            .create_database(
-                rules,
-                server.require_id().unwrap(),
-                Arc::clone(&server.store),
-            )
-            .await
-            .unwrap();
-
-        let base_url = format!(
-            "{}/iox/api/v1/databases/{}/wal/meta",
-            server_url, database_name
-        );
-
-        let client = Client::new();
-
-        let r1: WalMetadataResponse = check_json_response(&client, &base_url, StatusCode::OK).await;
-
-        let lines: std::result::Result<Vec<_>, _> = influxdb_line_protocol::parse_lines(
-            "cpu,host=A,region=west usage_system=64i 1590488773254420000",
-        )
-        .collect();
-
-        server
-            .write_lines(database_name, &lines.unwrap())
-            .await
-            .unwrap();
-
-        let r2: WalMetadataResponse = check_json_response(&client, &base_url, StatusCode::OK).await;
-
-        let limit_1 = serde_urlencoded::to_string(&WalMetadataQuery {
-            limit: Some(1),
-            newer_than: None,
-            offset: None,
-        })
-        .unwrap();
-        let limit_url = format!("{}?{}", base_url, limit_1);
-
-        let r3: WalMetadataResponse =
-            check_json_response(&client, &limit_url, StatusCode::OK).await;
-
-        let limit_future = serde_urlencoded::to_string(&WalMetadataQuery {
-            limit: None,
-            offset: None,
-            newer_than: Some(chrono::Utc::now() + chrono::Duration::seconds(5)),
-        })
-        .unwrap();
-        let future_url = format!("{}?{}", base_url, limit_future);
-
-        let r4: WalMetadataResponse =
-            check_json_response(&client, &future_url, StatusCode::OK).await;
-
-        // No data written yet - expect no results
-        assert_eq!(r1.segments.len(), 1);
-        assert_eq!(r1.segments[0].size, 0);
-        assert_eq!(r1.segments[0].writers.len(), 0);
-
-        // The WAL segment size is less than the line size
-        // We therefore expect an open and a closed segment in that order
-        // With the closed segment containing the written data
-        // And the open segment containing no data
-        assert_eq!(r2.segments.len(), 2);
-        assert_eq!(r2.segments[0].size, 0);
-        assert!(r2.segments[0].created_at >= r2.segments[1].created_at);
-
-        assert!(r2.segments[1].persisted.is_none());
-        assert_eq!(r2.segments[1].size, 368);
-        assert_eq!(r2.segments[1].writers.len(), 1);
-        assert_eq!(
-            r2.segments[1].writers.values().next().unwrap(),
-            &WriterSummary {
-                start_sequence: 1,
-                end_sequence: 1,
-                missing_sequence: false
-            }
-        );
-
-        // Query limited to a single segment - expect only the most recent segment
-        assert_eq!(r3.segments.len(), 1);
-        assert_eq!(r3.segments[0], r2.segments[0]);
-
-        // Requesting segments from future - expect no results
-        assert_eq!(r4.segments.len(), 0);
-    }
-
    fn get_content_type(response: &Result<Response, reqwest::Error>) -> String {
        if let Ok(response) = response {
            response
@ -1250,6 +1117,7 @@ mod tests {
        }
    }

+    #[allow(dead_code)]
    async fn check_json_response<T: DeserializeOwned + Eq + Debug>(
        client: &Client,
        url: &str,
@ -1291,9 +1159,9 @@ mod tests {
    /// Run the specified SQL query and return formatted results as a string
    async fn run_query(db: Arc<Db>, query: &str) -> Vec<RecordBatch> {
        let planner = SQLQueryPlanner::default();
-        let executor = Executor::new();
-        let physical_plan = planner.query(db, query, &executor).await.unwrap();
+        let executor = db.executor();
+        let physical_plan = planner.query(db, query, executor.as_ref()).unwrap();

-        collect(physical_plan).await.unwrap()
+        executor.collect(physical_plan).await.unwrap()
    }
 }
--- a/src/influxdb_ioxd/rpc/error.rs
+++ b/src/influxdb_ioxd/rpc/error.rs
@ -23,6 +23,11 @@ pub fn default_server_error_handler(error: server::Error) -> tonic::Status {
            description: source.to_string(),
        }
        .into(),
+        Error::DecodingEntry { source } => FieldViolation {
+            field: "entry".into(),
+            description: source.to_string(),
+        }
+        .into(),
        error => {
            error!(?error, "Unexpected error");
            InternalError {}.into()
--- a/src/influxdb_ioxd/rpc/flight.rs
+++ b/src/influxdb_ioxd/rpc/flight.rs
@ -1,3 +1,4 @@
+//! Implements the native gRPC IOx query API using Arrow Flight
 use std::{pin::Pin, sync::Arc};

 use futures::Stream;
@ -19,7 +20,6 @@ use arrow_deps::{
        Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
        HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
    },
-    datafusion::physical_plan::collect,
 };
 use data_types::{DatabaseName, DatabaseNameError};
 use query::{frontend::sql::SQLQueryPlanner, DatabaseStore};
@ -157,15 +157,17 @@ where
        let planner = SQLQueryPlanner::default();
        let executor = self.server.executor();

-        let physical_plan = planner
-            .query(db, &read_info.sql_query, &executor)
-            .await
-            .context(PlanningSQLQuery {
-                query: &read_info.sql_query,
-            })?;
+        let physical_plan =
+            planner
+                .query(db, &read_info.sql_query, &executor)
+                .context(PlanningSQLQuery {
+                    query: &read_info.sql_query,
+                })?;

        // execute the query
-        let results = collect(Arc::clone(&physical_plan))
+        let results = executor
+            .new_context()
+            .collect(Arc::clone(&physical_plan))
            .await
            .map_err(|e| Box::new(e) as _)
            .context(Query {
--- a/src/influxdb_ioxd/rpc/management.rs
+++ b/src/influxdb_ioxd/rpc/management.rs
@ -126,13 +126,8 @@ where
            Some(id) => id,
            None => return Err(NotFound::default().into()),
        };
-        let object_store = Arc::clone(&self.server.store);

-        match self
-            .server
-            .create_database(rules, server_id, object_store)
-            .await
-        {
+        match self.server.create_database(rules, server_id).await {
            Ok(_) => Ok(Response::new(CreateDatabaseResponse {})),
            Err(Error::DatabaseAlreadyExists { db_name }) => {
                return Err(AlreadyExists {
--- a/src/influxdb_ioxd/rpc/storage/service.rs
+++ b/src/influxdb_ioxd/rpc/storage/service.rs
@ -714,7 +714,6 @@ where

    let plan = planner
        .table_names(db.as_ref(), predicate)
-        .await
        .map_err(|e| Box::new(e) as _)
        .context(ListingTables { db_name })?;
    let executor = db_store.executor();
@ -765,7 +764,6 @@ where

    let tag_key_plan = planner
        .tag_keys(db.as_ref(), predicate)
-        .await
        .map_err(|e| Box::new(e) as _)
        .context(ListingColumns {
            db_name: db_name.as_str(),
@ -825,7 +823,6 @@ where

    let tag_value_plan = planner
        .tag_values(db.as_ref(), tag_name, predicate)
-        .await
        .map_err(|e| Box::new(e) as _)
        .context(ListingTagValues { db_name, tag_name })?;

@ -882,7 +879,6 @@ where

    let series_plan = planner
        .read_filter(db.as_ref(), predicate)
-        .await
        .map_err(|e| Box::new(e) as _)
        .context(PlanningFilteringSeries { db_name })?;

@ -968,14 +964,10 @@ where

    let grouped_series_set_plan = match gby_agg {
        GroupByAndAggregate::Columns { agg, group_columns } => {
-            planner
-                .read_group(db.as_ref(), predicate, agg, &group_columns)
-                .await
+            planner.read_group(db.as_ref(), predicate, agg, &group_columns)
        }
        GroupByAndAggregate::Window { agg, every, offset } => {
-            planner
-                .read_window_aggregate(db.as_ref(), predicate, agg, every, offset)
-                .await
+            planner.read_window_aggregate(db.as_ref(), predicate, agg, every, offset)
        }
    };
    let grouped_series_set_plan = grouped_series_set_plan
@ -1039,7 +1031,6 @@ where

    let field_list_plan = planner
        .field_columns(db.as_ref(), predicate)
-        .await
        .map_err(|e| Box::new(e) as _)
        .context(ListingFields { db_name })?;

--- a/src/influxdb_ioxd/rpc/write.rs
+++ b/src/influxdb_ioxd/rpc/write.rs
@ -47,6 +47,23 @@ where
        let lines_written = lp_line_count as u64;
        Ok(Response::new(WriteResponse { lines_written }))
    }
+
+    async fn write_entry(
+        &self,
+        request: tonic::Request<WriteEntryRequest>,
+    ) -> Result<tonic::Response<WriteEntryResponse>, tonic::Status> {
+        let request = request.into_inner();
+        if request.entry.is_empty() {
+            return Err(FieldViolation::required("entry").into());
+        }
+
+        self.server
+            .write_entry(&request.db_name, request.entry)
+            .await
+            .map_err(default_server_error_handler)?;
+
+        Ok(Response::new(WriteEntryResponse {}))
+    }
 }

 /// Instantiate the write service
--- a/tests/end_to_end_cases/management_api.rs
+++ b/tests/end_to_end_cases/management_api.rs
@ -277,7 +277,7 @@ async fn test_chunk_get() {
            partition_key: "cpu".into(),
            id: 0,
            storage: ChunkStorage::OpenMutableBuffer as i32,
-            estimated_bytes: 145,
+            estimated_bytes: 137,
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
@ -286,7 +286,7 @@ async fn test_chunk_get() {
            partition_key: "disk".into(),
            id: 0,
            storage: ChunkStorage::OpenMutableBuffer as i32,
-            estimated_bytes: 107,
+            estimated_bytes: 103,
            time_of_first_write: None,
            time_of_last_write: None,
            time_closing: None,
@ -452,7 +452,7 @@ async fn test_list_partition_chunks() {
        partition_key: "cpu".into(),
        id: 0,
        storage: ChunkStorage::OpenMutableBuffer as i32,
-        estimated_bytes: 145,
+        estimated_bytes: 137,
        time_of_first_write: None,
        time_of_last_write: None,
        time_closing: None,
--- a/tests/end_to_end_cases/management_cli.rs
+++ b/tests/end_to_end_cases/management_cli.rs
@ -191,7 +191,7 @@ async fn test_get_chunks() {
        .and(predicate::str::contains(
            r#""storage": "OpenMutableBuffer","#,
        ))
-        .and(predicate::str::contains(r#""estimated_bytes": 145"#))
+        .and(predicate::str::contains(r#""estimated_bytes": 137"#))
        // Check for a non empty timestamp such as
        // "time_of_first_write": "2021-03-30T17:11:10.723866Z",
        .and(predicate::str::contains(r#""time_of_first_write": "20"#));
--- a/tests/fixtures/lineproto/tag_values.lp.gz
+++ b/tests/fixtures/lineproto/tag_values.lp.gz
--- a/tracker/Cargo.toml
+++ b/tracker/Cargo.toml
@ -7,7 +7,7 @@ description = "Utilities for tracking resource utilisation within IOx"

 [dependencies]

-futures = "0.3.7"
+futures = "0.3"
 hashbrown = "0.9.1"
 observability_deps = { path = "../observability_deps" }
 pin-project = "1.0"
--- a/wal/Cargo.toml
+++ b/wal/Cargo.toml
@ -7,7 +7,7 @@ edition = "2018"
 [dependencies] # In alphabetical order
 byteorder = "1.3.4"
 crc32fast = "1.2.0"
-futures = "0.3.4"
+futures = "0.3"
 itertools = "0.9.0"
 once_cell = { version = "1.4.0", features = ["parking_lot"] }
 regex = "1.3.7"