From 6d793218377a36bada47d5004c517ec442be33ee Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 31 Jul 2020 11:28:11 +0100
Subject: [PATCH 01/73] feat: add mutli-column Quicksort for `[Packers]`

---
 delorean_table/src/packers.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/delorean_table/src/packers.rs b/delorean_table/src/packers.rs
index a0687afe5c..4d73da810f 100644
--- a/delorean_table/src/packers.rs
+++ b/delorean_table/src/packers.rs
@@ -215,7 +215,7 @@ where
 
 impl<T> Packer<T>
 where
-    T: Default + Clone,
+    T: Default + Clone + std::fmt::Debug,
 {
     pub fn new() -> Self {
         Self { values: Vec::new() }
@@ -333,7 +333,7 @@ where
 
 impl<'a, T> Iterator for PackerIterator<'a, T>
 where
-    T: Default + Clone,
+    T: Default + Clone + std::fmt::Debug,
 {
     type Item = Option<&'a T>;
 
@@ -365,7 +365,7 @@ where
 // `Packer<T>` value, e.g., `Packer<f64>`.
 impl<T> std::convert::From<Vec<Option<T>>> for Packer<T>
 where
-    T: Default + Clone,
+    T: Default + Clone + std::fmt::Debug,
 {
     fn from(values: Vec<Option<T>>) -> Self {
         let mut packer = Self::new();

From 238e9895551d7721126a91b4812dbb2440f56188 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 10:05:26 +0100
Subject: [PATCH 02/73] feat: add ability to emit Packer values in chunks

---
 delorean_table/src/packers.rs | 44 +++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/delorean_table/src/packers.rs b/delorean_table/src/packers.rs
index 4d73da810f..fc1223b30d 100644
--- a/delorean_table/src/packers.rs
+++ b/delorean_table/src/packers.rs
@@ -7,6 +7,7 @@
 // soon... We'll see how long that actually takes...
 use core::iter::Iterator;
 use std::iter;
+use std::slice::Chunks;
 
 use delorean_arrow::parquet::data_type::ByteArray;
 use std::default::Default;
@@ -44,7 +45,16 @@ macro_rules! typed_packer_accessors {
     };
 }
 
-impl Packers {
+impl<'a> Packers {
+    pub fn chunk_values(&self, chunk_size: usize) -> PackerChunker<'_> {
+        match self {
+            Self::Float(p) => PackerChunker::Float(p.values.chunks(chunk_size)),
+            Self::Integer(p) => PackerChunker::Integer(p.values.chunks(chunk_size)),
+            Self::String(p) => PackerChunker::String(p.values.chunks(chunk_size)),
+            Self::Boolean(p) => PackerChunker::Boolean(p.values.chunks(chunk_size)),
+        }
+    }
+
     /// Create a String Packers with repeated values.
     pub fn from_elem_str(v: &str, n: usize) -> Self {
         Self::String(Packer::from(vec![ByteArray::from(v); n]))
@@ -205,6 +215,15 @@ impl std::convert::From<Vec<Option<Vec<u8>>>> for Packers {
     }
 }
 
+/// PackerChunker represents chunkable Packer variants.
+#[derive(Debug)]
+pub enum PackerChunker<'a> {
+    Float(Chunks<'a, Option<f64>>),
+    Integer(Chunks<'a, Option<i64>>),
+    String(Chunks<'a, Option<ByteArray>>),
+    Boolean(Chunks<'a, Option<bool>>),
+}
+
 #[derive(Debug, Default, PartialEq)]
 pub struct Packer<T>
 where
@@ -259,7 +278,13 @@ where
         &self.values
     }
 
-    /// returns a binary vector indicating which indexes have null values.
+    /// Returns an iterator that emits `chunk_size` values from the Packer until
+    /// all values are returned.
+    pub fn chunk_values(&self, chunk_size: usize) -> std::slice::Chunks<'_, Option<T>> {
+        self.values.chunks(chunk_size)
+    }
+
+    /// Returns a binary vector indicating which indexes have null values.
     pub fn def_levels(&self) -> Vec<i16> {
         self.values
             .iter()
@@ -376,6 +401,21 @@ where
     }
 }
 
+// Convert `&[<Option<T>]`, e.g., `&[Option<f64>]` into the appropriate
+// `Packer<T>` value, e.g., `Packer<f64>`.
+impl<T> std::convert::From<&[Option<T>]> for Packer<T>
+where
+    T: Default + Clone + std::fmt::Debug,
+{
+    fn from(values: &[Option<T>]) -> Self {
+        let mut packer = Self::new();
+        for v in values {
+            packer.push_option(v.clone());
+        }
+        packer
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;

From cb3e948ca0d3d922fe1a79ed9bc1532a83696211 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 10:06:35 +0100
Subject: [PATCH 03/73] feat: TO REMOVE - TSM -> Arrow

---
 delorean_ingest/Cargo.toml |   2 +-
 delorean_ingest/src/lib.rs | 281 +++++++++++++++++++++++++++++++++++--
 2 files changed, 272 insertions(+), 11 deletions(-)

diff --git a/delorean_ingest/Cargo.toml b/delorean_ingest/Cargo.toml
index b1a26ea873..e582e8848b 100644
--- a/delorean_ingest/Cargo.toml
+++ b/delorean_ingest/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-
+arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
 snafu = "0.6.2"
 env_logger = "0.7.1"
 log = "0.4.8"
diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs
index 9f8ff4cfcc..9a0223e8b7 100644
--- a/delorean_ingest/src/lib.rs
+++ b/delorean_ingest/src/lib.rs
@@ -11,7 +11,7 @@
 
 use delorean_line_parser::{FieldValue, ParsedLine};
 use delorean_table::{
-    packers::{Packer, Packers},
+    packers::{Packer, PackerChunker, Packers},
     ByteArray, DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError,
 };
 use delorean_table_schema::{DataType, Schema, SchemaBuilder};
@@ -508,6 +508,154 @@ fn pack_lines<'a>(schema: &Schema, lines: &[ParsedLine<'a>]) -> Vec<Packers> {
     packers
 }
 
+use arrow::array;
+use arrow::datatypes;
+use arrow::ipc::writer;
+use arrow::record_batch;
+use std::fs::File;
+use std::sync::Arc;
+
+fn arrow_datatype(datatype: DataType) -> datatypes::DataType {
+    match datatype {
+        DataType::Float => datatypes::DataType::Float64,
+        DataType::Integer => datatypes::DataType::Int64,
+        DataType::String => datatypes::DataType::Utf8,
+        // DataType::String => datatypes::DataType::Dictionary(
+        //     std::boxed::Box::new(datatypes::DataType::Int16),
+        //     std::boxed::Box::new(datatypes::DataType::Utf8),
+        // ),
+        DataType::Boolean => datatypes::DataType::Boolean,
+        DataType::Timestamp => datatypes::DataType::Int64,
+    }
+}
+
+fn write_arrow_file(parquet_schema: Schema, packers: Vec<Packers>) -> Result<(), Error> {
+    let file = File::create("/tmp/http_api_requests_total.arrow").unwrap();
+
+    let mut record_batch_fields: Vec<datatypes::Field> = vec![];
+    // no default() on Field...
+    record_batch_fields.resize(
+        parquet_schema.get_col_defs().len(),
+        datatypes::Field::new("foo", datatypes::DataType::Int64, false),
+    );
+
+    for col_def in parquet_schema.get_col_defs() {
+        let nullable = col_def.data_type != DataType::Timestamp;
+        // if col_def.data_type == DataType::Timestamp {
+        //     nullable = false;
+        // } else {
+        //     nullable = true;
+        // }
+
+        record_batch_fields[col_def.index as usize] = datatypes::Field::new(
+            col_def.name.as_str(),
+            arrow_datatype(col_def.data_type),
+            nullable,
+        );
+    }
+    println!("{:?}", record_batch_fields);
+    println!("{:?}", parquet_schema.get_col_defs());
+    let schema = datatypes::Schema::new(record_batch_fields);
+
+    let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap();
+
+    // let num_rows = packers[0].num_rows();
+    let batch_size = 60_000;
+
+    let mut packer_chunkers: Vec<PackerChunker<'_>> = vec![];
+    for packer in &packers {
+        packer_chunkers.push(packer.chunk_values(batch_size));
+    }
+
+    loop {
+        let mut chunked_packers: Vec<Packers> = Vec::with_capacity(packers.len());
+        for chunker in &mut packer_chunkers {
+            match chunker {
+                PackerChunker::Float(c) => {
+                    if let Some(chunk) = c.next() {
+                        chunked_packers.push(Packers::Float(Packer::from(chunk)));
+                    }
+                }
+                PackerChunker::Integer(c) => {
+                    if let Some(chunk) = c.next() {
+                        chunked_packers.push(Packers::Integer(Packer::from(chunk)));
+                    }
+                }
+                PackerChunker::String(c) => {
+                    if let Some(chunk) = c.next() {
+                        chunked_packers.push(Packers::String(Packer::from(chunk)));
+                    }
+                }
+                PackerChunker::Boolean(c) => {
+                    if let Some(chunk) = c.next() {
+                        chunked_packers.push(Packers::Boolean(Packer::from(chunk)));
+                    }
+                }
+            }
+        }
+
+        if chunked_packers.is_empty() {
+            break;
+        }
+
+        // let sort = [0, 7, 6, 12];
+        // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
+        let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
+        delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap();
+
+        println!(
+            "Writing {:?} packers with size: {:?}",
+            chunked_packers.len(),
+            chunked_packers[0].num_rows()
+        );
+        write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers);
+    }
+
+    writer.finish().unwrap();
+    Ok(())
+}
+
+fn write_arrow_batch(
+    w: &mut writer::StreamWriter<File>,
+    schema: Arc<datatypes::Schema>,
+    packers: Vec<Packers>,
+) {
+    let mut record_batch_arrays: Vec<array::ArrayRef> = vec![];
+
+    for packer in packers {
+        match packer {
+            Packers::Float(p) => {
+                record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec())));
+            }
+            Packers::Integer(p) => {
+                record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec())));
+            }
+            Packers::String(p) => {
+                let mut builder = array::StringBuilder::new(p.num_rows());
+                for v in p.values() {
+                    match v {
+                        Some(v) => {
+                            builder.append_value(v.as_utf8().unwrap()).unwrap();
+                        }
+                        None => {
+                            builder.append_null().unwrap();
+                        }
+                    }
+                }
+                let array = builder.finish();
+                record_batch_arrays.push(Arc::new(array));
+            }
+            Packers::Boolean(p) => {
+                let array = array::BooleanArray::from(p.values().to_vec());
+                record_batch_arrays.push(Arc::new(array));
+            }
+        }
+    }
+
+    let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap();
+    w.write(&record_batch).unwrap();
+}
+
 /// Converts one or more TSM files into the delorean_table internal columnar
 /// data format and then passes that converted data to a `DeloreanTableWriter`.
 pub struct TSMFileConverter {
@@ -571,18 +719,131 @@ impl TSMFileConverter {
 
             match next_measurement {
                 Some(mut table) => {
+                    if table.name != "http_api_requests_total" {
+                        continue;
+                    }
                     // convert (potentially merged) measurement..
-                    let (schema, packed_columns) =
+                    let (schema, mut packed_columns) =
                         Self::process_measurement_table(&mut block_reader, &mut table)?;
-                    let mut table_writer = self
-                        .table_writer_source
-                        .next_writer(&schema)
-                        .context(WriterCreation)?;
 
-                    table_writer
-                        .write_batch(&packed_columns)
-                        .context(WriterCreation)?;
-                    table_writer.close().context(WriterCreation)?;
+                    // println!("col def {:?}", schema.get_col_defs());
+                    // // cardinality
+                    // for (i, col) in packed_columns.iter().enumerate() {
+                    //     println!("processing column {:?}", i);
+                    //     if let Packers::String(p) = col {
+                    //         let mut set: std::collections::BTreeSet<_> = BTreeSet::new();
+                    //         for v in p.iter() {
+                    //             if let Some(v) = v {
+                    //                 set.insert(String::from(v.as_utf8().unwrap()));
+                    //             }
+                    //         }
+                    //         println!("Cardinality for col is {:?}", set.len());
+                    //     }
+                    // }
+                    // col def [ColumnDefinition { name: "env", index: 0, data_type: String },
+                    // ColumnDefinition { name: "handler", index: 1, data_type: String },
+                    // ColumnDefinition { name: "host", index: 2, data_type: String },
+                    // ColumnDefinition { name: "hostname", index: 3, data_type: String },
+                    //  ColumnDefinition { name: "method", index: 4, data_type: String },
+                    //  ColumnDefinition { name: "nodename", index: 5, data_type: String },
+                    //   ColumnDefinition { name: "path", index: 6, data_type: String },
+                    //   ColumnDefinition { name: "role", index: 7, data_type: String },
+                    //   ColumnDefinition { name: "status", index: 8, data_type: String },
+                    //    ColumnDefinition { name: "url", index: 9, data_type: String },
+                    //    ColumnDefinition { name: "user_agent", index: 10, data_type: String },
+                    //    ColumnDefinition { name: "counter", index: 11, data_type: Float },
+                    //    ColumnDefinition { name: "time", index: 12, data_type: Timestamp }]
+                    // processing column 0
+                    // Cardinality for col is 8
+                    // processing column 1
+                    // Cardinality for col is 8
+                    // processing column 2
+                    // Cardinality for col is 3005
+                    // processing column 3
+                    // Cardinality for col is 3005
+                    // processing column 4
+                    // Cardinality for col is 6
+                    // processing column 5
+                    // Cardinality for col is 148
+                    // processing column 6
+                    // Cardinality for col is 78
+                    // processing column 7
+                    // Cardinality for col is 14
+                    // processing column 8
+                    // Cardinality for col is 4
+                    // processing column 9
+                    // Cardinality for col is 6
+                    // processing column 10
+                    // Cardinality for col is 71
+                    // processing column 11
+                    // processing column 12
+                    // got all card
+                    // println!("got all card");
+
+                    // sort low to high ==
+                    //
+                    // status       8  (4)
+                    // method       4  (6)
+                    // url          9  (6)
+                    // env          0  (8)
+                    // handler      1  (8)
+                    // role         7  (14)
+                    // user_agent   10 (71)
+                    // path         6  (78)
+                    // nodename     5  (148)
+                    // host         2  (3005)
+                    // hostname     3  (3005)
+                    //
+                    // time         12
+
+                    if packed_columns.len() < 13 {
+                        continue;
+                    }
+
+                    println!("length of column s is {:?}", packed_columns.len());
+                    // let sort = [0, 7, 6, 12];
+                    // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
+                    // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
+                    let sort = [12];
+                    println!("Starting sort with {:?}", sort);
+                    let now = std::time::Instant::now();
+
+                    delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap();
+
+                    println!("verifying order");
+                    let values = packed_columns[12].i64_packer_mut().values();
+                    let mut last = values[0];
+                    for i in 1..values.len() {
+                        assert!(values[i] >= last);
+                        last = values[i];
+                    }
+                    println!("finished sort in {:?}", now.elapsed());
+
+                    println!("Writing to arrow file!");
+                    write_arrow_file(schema, packed_columns).unwrap();
+                    println!("Done!");
+
+                    // if packed_columns.len() < 13 {
+                    //     continue;
+                    // }
+                    // println!("length of column s is {:?}", packed_columns.len());
+                    // // let sort = [0, 7, 6, 12];
+                    // // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
+                    // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
+                    // println!("Starting sort with {:?}", sort);
+                    // let now = std::time::Instant::now();
+                    // delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap();
+                    // println!("finished sort in {:?}", now.elapsed());
+
+                    // let mut table_writer = self
+                    //     .table_writer_source
+                    //     .next_writer(&schema)
+                    //     .context(WriterCreation)?;
+
+                    // table_writer
+                    //     .write_batch(&packed_columns)
+                    //     .context(WriterCreation)?;
+                    // table_writer.close().context(WriterCreation)?;
                 }
                 None => break,
             }

From aba02cb731f533531ae9f639bcd9ac36330092d2 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 13:47:14 +0100
Subject: [PATCH 04/73] feat: basic store

---
 .gitignore                          |   2 +-
 Cargo.lock                          |  54 ++++-
 Cargo.toml                          |   2 +
 delorean_mem_qe/Cargo.toml          |  16 ++
 delorean_mem_qe/benches/encoding.rs |  97 +++++++++
 delorean_mem_qe/src/bin/main.rs     |  93 +++++++++
 delorean_mem_qe/src/column.rs       | 233 ++++++++++++++++++++++
 delorean_mem_qe/src/encoding.rs     | 299 ++++++++++++++++++++++++++++
 delorean_mem_qe/src/lib.rs          |  28 +++
 delorean_mem_qe/src/segment.rs      |  95 +++++++++
 10 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 delorean_mem_qe/Cargo.toml
 create mode 100644 delorean_mem_qe/benches/encoding.rs
 create mode 100644 delorean_mem_qe/src/bin/main.rs
 create mode 100644 delorean_mem_qe/src/column.rs
 create mode 100644 delorean_mem_qe/src/encoding.rs
 create mode 100644 delorean_mem_qe/src/lib.rs
 create mode 100644 delorean_mem_qe/src/segment.rs

diff --git a/.gitignore b/.gitignore
index 6bab7d0b55..98a48ecef1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-/target
+**/target
 **/*.rs.bk
 .idea/
 .env
diff --git a/Cargo.lock b/Cargo.lock
index 29c2d3710e..328fb59d2e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -104,6 +104,42 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "arrow"
+version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
+dependencies = [
+ "arrow-flight",
+ "chrono",
+ "csv",
+ "flatbuffers",
+ "hex",
+ "indexmap",
+ "lazy_static",
+ "num 0.3.0",
+ "prettytable-rs",
+ "rand",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-flight"
+version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
+dependencies = [
+ "bytes",
+ "futures",
+ "proc-macro2",
+ "prost",
+ "prost-derive",
+ "tokio",
+ "tonic",
+ "tonic-build",
+]
+
 [[package]]
 name = "assert-json-diff"
 version = "1.1.0"
@@ -611,7 +647,7 @@ name = "datafusion"
 version = "2.0.0-SNAPSHOT"
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
 dependencies = [
- "arrow",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "chrono",
  "clap",
  "crossbeam",
@@ -637,6 +673,7 @@ dependencies = [
  "delorean_generated_types",
  "delorean_ingest",
  "delorean_line_parser",
+ "delorean_mem_qe",
  "delorean_object_store",
  "delorean_parquet",
  "delorean_partitioned_store",
@@ -677,7 +714,7 @@ dependencies = [
 name = "delorean_arrow"
 version = "0.1.0"
 dependencies = [
- "arrow",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "datafusion",
  "parquet",
 ]
@@ -698,6 +735,7 @@ dependencies = [
 name = "delorean_ingest"
 version = "0.1.0"
 dependencies = [
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
  "delorean_line_parser",
  "delorean_table",
  "delorean_table_schema",
@@ -722,6 +760,16 @@ dependencies = [
  "snafu",
 ]
 
+[[package]]
+name = "delorean_mem_qe"
+version = "0.1.0"
+dependencies = [
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "croaring",
+ "delorean_table",
+ "snafu",
+]
+
 [[package]]
 name = "delorean_object_store"
 version = "0.1.0"
@@ -1982,7 +2030,7 @@ name = "parquet"
 version = "2.0.0-SNAPSHOT"
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
 dependencies = [
- "arrow",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "brotli",
  "byteorder",
  "chrono",
diff --git a/Cargo.toml b/Cargo.toml
index a7988b0734..fbc65ec72c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ members = [
     "delorean_ingest",
     "delorean_line_parser",
     "delorean_object_store",
+    "delorean_mem_qe",
     "delorean_parquet",
     "delorean_partitioned_store",
     "delorean_table",
@@ -33,6 +34,7 @@ delorean_arrow = { path = "delorean_arrow" }
 delorean_generated_types = { path = "delorean_generated_types" }
 delorean_ingest = { path = "delorean_ingest" }
 delorean_line_parser = { path = "delorean_line_parser" }
+delorean_mem_qe = { path = "delorean_mem_qe" }
 delorean_parquet = { path = "delorean_parquet" }
 delorean_partitioned_store = { path = "delorean_partitioned_store" }
 delorean_table = { path = "delorean_table" }
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
new file mode 100644
index 0000000000..aaf38f1b7a
--- /dev/null
+++ b/delorean_mem_qe/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "delorean_mem_qe"
+version = "0.1.0"
+authors = ["Edd Robinson <me@edd.io>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+delorean_table = { path = "../delorean_table" }
+arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
+snafu = "0.6.8"
+croaring = "0.4.5"
+
+[dev-dependencies]
+
diff --git a/delorean_mem_qe/benches/encoding.rs b/delorean_mem_qe/benches/encoding.rs
new file mode 100644
index 0000000000..504ce64c8d
--- /dev/null
+++ b/delorean_mem_qe/benches/encoding.rs
@@ -0,0 +1,97 @@
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
+
+const BATCH_SIZES: [usize; 5] = [10, 100, 1_000, 10_000, 100_000];
+const CARDINALITIES: [usize; 4] = [1, 5, 10, 100];
+
+fn encoding_drle_row_ids_sorted(c: &mut Criterion) {
+    benchmark_row_ids(
+        c,
+        "encoding_drle_row_ids_sorted",
+        &BATCH_SIZES,
+        &CARDINALITIES,
+    );
+}
+
+fn benchmark_row_ids(
+    c: &mut Criterion,
+    benchmark_group_name: &str,
+    batch_sizes: &[usize],
+    cardinalities: &[usize],
+) {
+    let mut group = c.benchmark_group(benchmark_group_name);
+    for &batch_size in batch_sizes {
+        for &cardinality in cardinalities {
+            let mut input = delorean_mem_qe::encoding::DictionaryRLE::new();
+            let values = batch_size / cardinality;
+            for i in 0..cardinality {
+                input.push_additional(i.to_string().as_str(), values as u64);
+            }
+            group.throughput(Throughput::Bytes(batch_size as u64));
+
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!("{:?}_{:?}", batch_size, cardinality)),
+                &input,
+                |b, input| {
+                    b.iter(|| {
+                        // do work
+                        for i in 0..cardinality {
+                            let ids = input
+                                .row_ids(i.to_string().as_str())
+                                .collect::<Vec<usize>>();
+                        }
+                    });
+                },
+            );
+        }
+    }
+    group.finish();
+}
+
+fn encoding_drle_row_ids_sorted_roaring(c: &mut Criterion) {
+    benchmark_row_ids_roaring(
+        c,
+        "encoding_drle_row_ids_sorted_roaring",
+        &BATCH_SIZES,
+        &CARDINALITIES,
+    );
+}
+
+fn benchmark_row_ids_roaring(
+    c: &mut Criterion,
+    benchmark_group_name: &str,
+    batch_sizes: &[usize],
+    cardinalities: &[usize],
+) {
+    let mut group = c.benchmark_group(benchmark_group_name);
+    for &batch_size in batch_sizes {
+        for &cardinality in cardinalities {
+            let mut input = delorean_mem_qe::encoding::DictionaryRLE::new();
+            let values = batch_size / cardinality;
+            for i in 0..cardinality {
+                input.push_additional(i.to_string().as_str(), values as u64);
+            }
+            group.throughput(Throughput::Bytes(batch_size as u64));
+
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!("{:?}_{:?}", batch_size, cardinality)),
+                &input,
+                |b, input| {
+                    b.iter(|| {
+                        // do work
+                        for i in 0..cardinality {
+                            let ids = input.row_ids_roaring(i.to_string().as_str());
+                        }
+                    });
+                },
+            );
+        }
+    }
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    encoding_drle_row_ids_sorted,
+    encoding_drle_row_ids_sorted_roaring
+);
+criterion_main!(benches);
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
new file mode 100644
index 0000000000..c7bdc932b3
--- /dev/null
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -0,0 +1,93 @@
+use std::{fs::File, path::Path};
+
+use arrow::record_batch::{RecordBatch, RecordBatchReader};
+use arrow::{array, array::Array, datatypes, ipc};
+
+use delorean_mem_qe::column;
+use delorean_mem_qe::column::Column;
+use delorean_mem_qe::segment::Segment;
+use delorean_mem_qe::Store;
+
+// use snafu::ensure;
+use snafu::Snafu;
+
+#[derive(Snafu, Debug, Clone, Copy, PartialEq)]
+pub enum Error {
+    // #[snafu(display(r#"Too many sort columns specified"#))]
+// TooManyColumns,
+
+// #[snafu(display(r#"Same column specified as sort column multiple times"#))]
+// RepeatedColumns { index: usize },
+
+// #[snafu(display(r#"Specified column index is out bounds"#))]
+// OutOfBoundsColumn { index: usize },
+}
+
+fn main() {
+    let mut store = Store::default();
+    read_arrow_file(&mut store);
+
+    println!(
+        "total segments {:?} with total size {:?}",
+        store.segment_total(),
+        store.size(),
+    );
+}
+
+fn read_arrow_file(store: &mut Store) {
+    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
+    let mut reader = ipc::reader::StreamReader::try_new(r).unwrap();
+    while let Some(batch) = reader.next_batch().unwrap() {
+        let segment = record_batch_to_segment(&batch).unwrap();
+        store.add_segment(segment);
+    }
+}
+
+fn record_batch_to_segment(rb: &RecordBatch) -> Result<Segment, Error> {
+    let mut segment = Segment::default();
+
+    // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
+    for (i, column) in rb.columns().iter().enumerate() {
+        match *column.data_type() {
+            datatypes::DataType::Float64 => {
+                let arr = column
+                    .as_any()
+                    .downcast_ref::<array::Float64Array>()
+                    .unwrap();
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), column);
+            }
+            datatypes::DataType::Int64 => {
+                let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), column);
+            }
+            datatypes::DataType::Utf8 => {
+                let arr = column
+                    .as_any()
+                    .downcast_ref::<array::StringArray>()
+                    .unwrap();
+
+                let mut column = column::String::default();
+                let mut prev = arr.value(0);
+                let mut count = 1_u64;
+                for j in 1..arr.len() {
+                    let next = arr.value(j);
+                    if prev == next {
+                        count += 1;
+                    } else {
+                        column.add_additional(prev, count);
+                        prev = next;
+                        count = 1;
+                    }
+                }
+                segment.add_column(rb.schema().field(i).name(), Column::String(column));
+            }
+            datatypes::DataType::Boolean => {
+                panic!("unsupported");
+            }
+            _ => panic!("unsupported datatype"),
+        }
+    }
+    Ok(segment)
+}
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
new file mode 100644
index 0000000000..188d0dd193
--- /dev/null
+++ b/delorean_mem_qe/src/column.rs
@@ -0,0 +1,233 @@
+use std::convert::From;
+
+use super::encoding;
+
+#[derive(Debug)]
+pub enum Column {
+    String(String),
+    Float(Float),
+    Integer(Integer),
+}
+
+impl Column {
+    /// Returns the number of logical rows for the column.
+    pub fn num_rows(&self) -> usize {
+        match self {
+            Column::String(c) => c.meta.num_rows(),
+            Column::Float(c) => c.meta.num_rows(),
+            Column::Integer(c) => c.meta.num_rows(),
+        }
+    }
+
+    // Returns the size of the segment in bytes.
+    pub fn size(&self) -> usize {
+        match self {
+            Column::String(c) => c.size(),
+            Column::Float(c) => c.size(),
+            Column::Integer(c) => c.size(),
+        }
+    }
+}
+
+impl From<&[f64]> for Column {
+    fn from(values: &[f64]) -> Self {
+        Self::Float(Float::from(values))
+    }
+}
+
+impl From<&[i64]> for Column {
+    fn from(values: &[i64]) -> Self {
+        Self::Integer(Integer::from(values))
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct String {
+    meta: metadata::Str,
+
+    // TODO(edd): this would probably have multiple possible encodings
+    data: encoding::DictionaryRLE,
+}
+
+impl String {
+    pub fn add(&mut self, s: &str) {
+        self.meta.add(s);
+        self.data.push(s);
+    }
+
+    pub fn add_additional(&mut self, s: &str, additional: u64) {
+        self.meta.add(s);
+        self.data.push_additional(s, additional);
+    }
+
+    pub fn column_range(&self) -> (&str, &str) {
+        self.meta.range()
+    }
+
+    pub fn size(&self) -> usize {
+        self.meta.size() + self.data.size()
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct Float {
+    meta: metadata::F64,
+
+    // TODO(edd): compression of float columns
+    data: encoding::PlainFixed<f64>,
+}
+
+impl Float {
+    pub fn column_range(&self) -> (f64, f64) {
+        self.meta.range()
+    }
+
+    pub fn size(&self) -> usize {
+        self.meta.size() + self.data.size()
+    }
+}
+
+impl From<&[f64]> for Float {
+    fn from(values: &[f64]) -> Self {
+        let len = values.len();
+        let mut min = std::f64::MAX;
+        let mut max = std::f64::MIN;
+
+        // calculate min/max for meta data
+        for v in values {
+            min = min.min(*v);
+            max = max.max(*v);
+        }
+
+        Self {
+            meta: metadata::F64::new((min, max), len),
+            data: encoding::PlainFixed::from(values),
+        }
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct Integer {
+    meta: metadata::I64,
+
+    // TODO(edd): compression of integers
+    data: encoding::PlainFixed<i64>,
+}
+
+impl Integer {
+    pub fn column_range(&self) -> (i64, i64) {
+        self.meta.range()
+    }
+
+    pub fn size(&self) -> usize {
+        self.meta.size() + self.data.size()
+    }
+}
+
+impl From<&[i64]> for Integer {
+    fn from(values: &[i64]) -> Self {
+        let len = values.len();
+        let mut min = std::i64::MAX;
+        let mut max = std::i64::MIN;
+
+        // calculate min/max for meta data
+        for v in values {
+            min = min.min(*v);
+            max = max.max(*v);
+        }
+
+        Self {
+            meta: metadata::I64::new((min, max), len),
+            data: encoding::PlainFixed::from(values),
+        }
+    }
+}
+
+pub mod metadata {
+    #[derive(Debug, Default)]
+    pub struct Str {
+        range: (String, String),
+        num_rows: usize,
+        // sparse_index: BTreeMap<String, usize>,
+    }
+
+    impl Str {
+        pub fn add(&mut self, s: &str) {
+            self.num_rows += 1;
+
+            if self.range.0.as_str() > s {
+                self.range.0 = s.to_owned();
+            }
+
+            if self.range.1.as_str() < s {
+                self.range.1 = s.to_owned();
+            }
+        }
+
+        pub fn num_rows(&self) -> usize {
+            self.num_rows
+        }
+
+        pub fn range(&self) -> (&str, &str) {
+            (&self.range.0, &self.range.1)
+        }
+
+        pub fn size(&self) -> usize {
+            self.range.0.len() + self.range.1.len() + std::mem::size_of::<usize>()
+        }
+    }
+
+    #[derive(Debug, Default)]
+    pub struct F64 {
+        range: (f64, f64),
+        num_rows: usize,
+    }
+
+    impl F64 {
+        pub fn new(range: (f64, f64), rows: usize) -> Self {
+            Self {
+                range,
+                num_rows: rows,
+            }
+        }
+
+        pub fn num_rows(&self) -> usize {
+            self.num_rows
+        }
+
+        pub fn range(&self) -> (f64, f64) {
+            self.range
+        }
+
+        pub fn size(&self) -> usize {
+            std::mem::size_of::<(f64, f64)>() + std::mem::size_of::<usize>()
+        }
+    }
+
+    #[derive(Debug, Default)]
+    pub struct I64 {
+        range: (i64, i64),
+        num_rows: usize,
+    }
+
+    impl I64 {
+        pub fn new(range: (i64, i64), rows: usize) -> Self {
+            Self {
+                range,
+                num_rows: rows,
+            }
+        }
+
+        pub fn num_rows(&self) -> usize {
+            self.num_rows
+        }
+
+        pub fn range(&self) -> (i64, i64) {
+            self.range
+        }
+
+        pub fn size(&self) -> usize {
+            std::mem::size_of::<(i64, i64)>() + std::mem::size_of::<usize>()
+        }
+    }
+}
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
new file mode 100644
index 0000000000..b760771cdb
--- /dev/null
+++ b/delorean_mem_qe/src/encoding.rs
@@ -0,0 +1,299 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::iter;
+
+// TODO(edd): this is just for convenience. In reality one would store nulls
+// separately and not use `Option<T>`.
+#[derive(Debug, Default)]
+pub struct PlainFixedOption<T> {
+    values: Vec<Option<T>>,
+}
+
+impl<T> PlainFixedOption<T> {
+    pub fn size(&self) -> usize {
+        self.values.len() * std::mem::size_of::<Option<T>>()
+    }
+}
+
+#[derive(Debug, Default)]
+// No compression
+pub struct PlainFixed<T> {
+    values: Vec<T>,
+}
+
+impl<T> PlainFixed<T> {
+    pub fn size(&self) -> usize {
+        self.values.len() * std::mem::size_of::<T>()
+    }
+}
+
+impl From<&[i64]> for PlainFixed<i64> {
+    fn from(v: &[i64]) -> Self {
+        Self { values: v.to_vec() }
+    }
+}
+
+impl From<&[f64]> for PlainFixed<f64> {
+    fn from(v: &[f64]) -> Self {
+        Self { values: v.to_vec() }
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct DictionaryRLE {
+    // stores the mapping between an entry and its assigned index.
+    map: BTreeMap<String, usize>,
+    map_size: usize, // TODO(edd) this isn't perfect at all
+
+    // stores tuples where each pair refers to a dictionary entry and the number
+    // of times the entry repeats.
+    run_lengths: Vec<(usize, u64)>,
+    run_length_size: usize,
+
+    total: u64,
+}
+
+impl DictionaryRLE {
+    pub fn new() -> Self {
+        Self {
+            map: BTreeMap::new(),
+            map_size: 0,
+            run_lengths: Vec::new(),
+            run_length_size: 0,
+            total: 0,
+        }
+    }
+
+    pub fn push(&mut self, v: &str) {
+        self.push_additional(v, 1);
+    }
+
+    pub fn push_additional(&mut self, v: &str, additional: u64) {
+        self.total += additional;
+        let idx = self.map.get(v);
+        match idx {
+            Some(idx) => {
+                if let Some((last_idx, rl)) = self.run_lengths.last_mut() {
+                    if last_idx == idx {
+                        // update the existing run-length
+                        *rl += additional;
+                    } else {
+                        // start a new run-length
+                        self.run_lengths.push((*idx, additional));
+                        self.run_length_size += std::mem::size_of::<(usize, u64)>();
+                    }
+                }
+            }
+            None => {
+                // New dictionary entry.
+                if idx.is_none() {
+                    let idx = self.map.len();
+
+                    self.map.insert(String::from(v), idx);
+                    self.map_size += v.len() + std::mem::size_of::<usize>();
+
+                    self.run_lengths.push((idx, additional));
+                    self.run_length_size += std::mem::size_of::<(usize, u64)>();
+                    return;
+                }
+            }
+        }
+    }
+
+    // row_ids returns an iterator over the set of row ids matching the provided
+    // value.
+    pub fn row_ids(&self, value: &str) -> impl iter::Iterator<Item = usize> {
+        let mut out: Vec<usize> = vec![];
+        if let Some(idx) = self.map.get(value) {
+            let mut index: usize = 0;
+            for (other_idx, other_rl) in &self.run_lengths {
+                let start = index;
+                index += *other_rl as usize;
+                if other_idx == idx {
+                    out.extend(start..index)
+                }
+            }
+        }
+        out.into_iter()
+    }
+
+    // row_ids returns an iterator over the set of row ids matching the provided
+    // value.
+    pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap {
+        let mut bm = croaring::Bitmap::create();
+        if let Some(idx) = self.map.get(value) {
+            let mut index: u64 = 0;
+            for (other_idx, other_rl) in &self.run_lengths {
+                let start = index;
+                index += other_rl;
+                if other_idx == idx {
+                    bm.add_range(start..index);
+                }
+            }
+        }
+        bm
+    }
+
+    // row_ids returns an iterator over the set of row ids matching the provided
+    // value
+    // pub fn row_ids(&'a self, value: &str) -> impl iter::Iterator<Item = usize> {
+    //     if let Some(idx) = self.map.get(value) {
+    //         let mut index: usize = 0;
+    //         return self.run_lengths.iter().flat_map(|(other_idx, other_rl)| {
+    //             let start = index;
+    //             index += *other_rl as usize;
+
+    //             if other_idx != idx {
+    //                 let iter: Box<dyn Iterator<Item = usize>> = Box::new(iter::empty::<usize>());
+    //                 return iter;
+    //             }
+    //             Box::new(start..index)
+    //         });
+    //     }
+
+    //     // I need to return the same type as flatten_map or box the flatten_map return and this one??
+    //     unreachable!("for now");
+    // }
+
+    pub fn dictionary(&self) -> BTreeSet<String> {
+        self.map.keys().cloned().collect::<BTreeSet<String>>()
+    }
+
+    // get the logical value at the provided index, or None if there is no value
+    // at index.
+    pub fn value(&self, index: usize) -> Option<&str> {
+        if index < self.total as usize {
+            // build reverse mapping.
+            let mut idx_value = BTreeMap::new();
+            for (k, v) in &self.map {
+                idx_value.insert(v, k.as_str());
+            }
+            assert_eq!(idx_value.len(), self.map.len());
+
+            let mut total = 0;
+            for (idx, rl) in &self.run_lengths {
+                if total + rl > index as u64 {
+                    return idx_value.get(idx).cloned();
+                }
+                total += rl;
+            }
+        }
+        None
+    }
+
+    // values materialises a vector of references to all logical values in the
+    // encoding.
+    pub fn values(&mut self) -> Vec<&str> {
+        let mut out = Vec::with_capacity(self.total as usize);
+
+        // build reverse mapping.
+        let mut idx_value = BTreeMap::new();
+        for (k, v) in &self.map {
+            idx_value.insert(v, k.as_str());
+        }
+        assert_eq!(idx_value.len(), self.map.len());
+
+        for (idx, rl) in &self.run_lengths {
+            let &v = idx_value.get(&idx).unwrap();
+            out.extend(iter::repeat(&v).take(*rl as usize));
+        }
+        out
+    }
+
+    pub fn size(&self) -> usize {
+        self.map_size + self.run_length_size
+    }
+}
+
+// TODO(edd): improve perf here....
+impl std::convert::From<Vec<&str>> for DictionaryRLE {
+    fn from(vec: Vec<&str>) -> Self {
+        let mut drle = Self::new();
+        for v in vec {
+            drle.push(v);
+        }
+        drle
+    }
+}
+
+// TODO(edd): improve perf here....
+impl std::convert::From<&delorean_table::Packer<delorean_table::ByteArray>> for DictionaryRLE {
+    fn from(p: &delorean_table::Packer<delorean_table::ByteArray>) -> Self {
+        let mut drle = Self::new();
+        for v in p.values() {
+            let s = v
+                .clone()
+                .unwrap_or_else(|| delorean_table::ByteArray::from("NULL"));
+            drle.push(s.as_utf8().unwrap());
+        }
+        drle
+    }
+}
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn dict_rle() {
+        let mut drle = super::DictionaryRLE::new();
+        drle.push("hello");
+        drle.push("hello");
+        drle.push("world");
+        drle.push("hello");
+        drle.push("hello");
+        drle.push_additional("hello", 1);
+
+        assert_eq!(
+            drle.values(),
+            ["hello", "hello", "world", "hello", "hello", "hello",]
+        );
+
+        drle.push_additional("zoo", 3);
+        assert_eq!(
+            drle.values(),
+            ["hello", "hello", "world", "hello", "hello", "hello", "zoo", "zoo", "zoo"]
+        );
+
+        assert_eq!(drle.value(0).unwrap(), "hello");
+        assert_eq!(drle.value(1).unwrap(), "hello");
+        assert_eq!(drle.value(2).unwrap(), "world");
+        assert_eq!(drle.value(3).unwrap(), "hello");
+        assert_eq!(drle.value(4).unwrap(), "hello");
+        assert_eq!(drle.value(5).unwrap(), "hello");
+        assert_eq!(drle.value(6).unwrap(), "zoo");
+        assert_eq!(drle.value(7).unwrap(), "zoo");
+        assert_eq!(drle.value(8).unwrap(), "zoo");
+    }
+
+    #[test]
+    fn row_ids() {
+        let mut drle = super::DictionaryRLE::new();
+        drle.push_additional("abc", 3);
+        drle.push_additional("dre", 2);
+        drle.push("abc");
+
+        let ids = drle.row_ids("abc").collect::<Vec<usize>>();
+        assert_eq!(ids, vec![0, 1, 2, 5]);
+
+        let ids = drle.row_ids("dre").collect::<Vec<usize>>();
+        assert_eq!(ids, vec![3, 4]);
+
+        let ids = drle.row_ids("foo").collect::<Vec<usize>>();
+        assert_eq!(ids, vec![]);
+    }
+
+    #[test]
+    fn row_ids_roaring() {
+        let mut drle = super::DictionaryRLE::new();
+        drle.push_additional("abc", 3);
+        drle.push_additional("dre", 2);
+        drle.push("abc");
+
+        let ids = drle.row_ids_roaring("abc").iter().collect::<Vec<u32>>();
+        assert_eq!(ids, vec![0, 1, 2, 5]);
+
+        let ids = drle.row_ids_roaring("dre").iter().collect::<Vec<u32>>();
+        assert_eq!(ids, vec![3, 4]);
+
+        let ids = drle.row_ids_roaring("foo").iter().collect::<Vec<u32>>();
+        assert_eq!(ids, vec![]);
+    }
+}
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
new file mode 100644
index 0000000000..8c24ab7c7c
--- /dev/null
+++ b/delorean_mem_qe/src/lib.rs
@@ -0,0 +1,28 @@
+pub mod column;
+pub mod encoding;
+pub mod segment;
+
+use segment::Segment;
+
+#[derive(Debug, Default)]
+pub struct Store {
+    segments: Vec<Segment>,
+
+    store_size: usize,
+}
+
+impl Store {
+    pub fn add_segment(&mut self, segment: Segment) {
+        self.store_size += segment.size();
+        self.segments.push(segment);
+    }
+
+    /// The total size of all segments in the store.s
+    pub fn size(&self) -> usize {
+        self.store_size
+    }
+
+    pub fn segment_total(&self) -> usize {
+        self.segments.len()
+    }
+}
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
new file mode 100644
index 0000000000..873364de67
--- /dev/null
+++ b/delorean_mem_qe/src/segment.rs
@@ -0,0 +1,95 @@
+use std::collections::BTreeMap;
+
+use super::column;
+use super::column::Column;
+
+#[derive(Debug, Default)]
+pub struct Segment {
+    meta: SegmentMetaData,
+
+    // Columns within a segment
+    columns: Vec<column::Column>,
+    // string_columns: Vec<column::String>,
+    // f64_columns: Vec<column::F64>,
+}
+
+impl Segment {
+    pub fn new(rows: usize) -> Self {
+        let mut segment = Self::default();
+        segment.meta.rows = rows;
+        segment
+    }
+
+    pub fn num_rows(&self) -> usize {
+        self.meta.rows
+    }
+
+    pub fn column_names(&self) -> Vec<String> {
+        self.meta.column_names.clone()
+    }
+
+    pub fn time_range(&self) -> (i64, i64) {
+        self.meta.time_range
+    }
+
+    pub fn add_column(&mut self, name: &str, c: column::Column) {
+        // TODO(edd) yuk
+        if name == "time" {
+            if let column::Column::Integer(ts) = &c {
+                self.meta.time_range = ts.column_range();
+            } else {
+                panic!("incorrect column type for time");
+            }
+        }
+        self.meta.rows = c.num_rows();
+
+        // validate column doesn't already exist in segment
+        assert!(!self.meta.column_names.contains(&name.to_owned()));
+        self.meta.column_names.push(name.to_owned());
+        self.columns.push(c);
+    }
+
+    // TODO - iterator....
+    pub fn size(&self) -> usize {
+        let mut size = 0;
+        for c in &self.columns {
+            size += c.size();
+        }
+        size
+    }
+
+    // Returns the size of each of the segment's columns in bytes.
+    pub fn column_sizes(&self) -> BTreeMap<String, usize> {
+        let mut column_sizes = BTreeMap::new();
+        let names = self.column_names();
+        for (i, column) in self.columns.iter().enumerate() {
+            match column {
+                Column::String(c) => {
+                    column_sizes.insert(names[i].clone(), c.size());
+                }
+                Column::Float(c) => {
+                    column_sizes.insert(names[i].clone(), c.size());
+                }
+                Column::Integer(c) => {
+                    column_sizes.insert(names[i].clone(), c.size());
+                }
+            }
+        }
+        column_sizes
+    }
+}
+
+/// Meta data for a segment. This data is mainly used to determine if a segment
+/// may contain value for answering a query.
+#[derive(Debug, Default)]
+pub struct SegmentMetaData {
+    size: usize, // TODO
+    rows: usize,
+
+    column_names: Vec<String>,
+    time_range: (i64, i64),
+    // TODO column sort order
+}
+
+#[cfg(test)]
+mod test {}

From 8670af5d30504767393389f2fc100fc7560f544d Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 17:07:40 +0100
Subject: [PATCH 05/73] feat: column min

---
 delorean_mem_qe/src/bin/main.rs | 45 +++++++++++++++++++++++++++------
 delorean_mem_qe/src/column.rs   | 15 +++++++++++
 delorean_mem_qe/src/lib.rs      |  6 ++++-
 delorean_mem_qe/src/segment.rs  | 43 ++++++++++++++++++++++++++-----
 4 files changed, 93 insertions(+), 16 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index c7bdc932b3..9c5564ea69 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -4,7 +4,7 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader};
 use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
-use delorean_mem_qe::column::Column;
+use delorean_mem_qe::column::{Column, Scalar};
 use delorean_mem_qe::segment::Segment;
 use delorean_mem_qe::Store;
 
@@ -24,26 +24,52 @@ pub enum Error {
 }
 
 fn main() {
+    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
+    let reader = ipc::reader::StreamReader::try_new(r).unwrap();
+
     let mut store = Store::default();
-    read_arrow_file(&mut store);
+    build_store(reader, &mut store).unwrap();
 
     println!(
         "total segments {:?} with total size {:?}",
         store.segment_total(),
         store.size(),
     );
+
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_min = 0;
+    for _ in 1..10000 {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let min = segments.column_min("time").unwrap();
+        total_time += now.elapsed();
+
+        if let Scalar::Integer(v) = min {
+            total_min += v
+        }
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        10000,
+        total_time,
+        total_time / 10000,
+        total_min
+    );
+    // println!("{:?} min -> {:?} in {:?}", "time", min, elapsed);
 }
 
-fn read_arrow_file(store: &mut Store) {
-    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
-    let mut reader = ipc::reader::StreamReader::try_new(r).unwrap();
-    while let Some(batch) = reader.next_batch().unwrap() {
-        let segment = record_batch_to_segment(&batch).unwrap();
+fn build_store(
+    mut reader: arrow::ipc::reader::StreamReader<File>,
+    store: &mut Store,
+) -> Result<(), Error> {
+    while let Some(rb) = reader.next_batch().unwrap() {
+        let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
+    Ok(())
 }
 
-fn record_batch_to_segment(rb: &RecordBatch) -> Result<Segment, Error> {
+fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
     let mut segment = Segment::default();
 
     // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
@@ -54,11 +80,13 @@ fn record_batch_to_segment(rb: &RecordBatch) -> Result<Segment, Error> {
                     .as_any()
                     .downcast_ref::<array::Float64Array>()
                     .unwrap();
+
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Int64 => {
                 let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
+
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
             }
@@ -81,6 +109,7 @@ fn record_batch_to_segment(rb: &RecordBatch) -> Result<Segment, Error> {
                         count = 1;
                     }
                 }
+
                 segment.add_column(rb.schema().field(i).name(), Column::String(column));
             }
             datatypes::DataType::Boolean => {
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 188d0dd193..81627dcd11 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -2,6 +2,13 @@ use std::convert::From;
 
 use super::encoding;
 
+#[derive(Debug, PartialEq, PartialOrd)]
+pub enum Scalar<'a> {
+    String(&'a str),
+    Float(f64),
+    Integer(i64),
+}
+
 #[derive(Debug)]
 pub enum Column {
     String(String),
@@ -27,6 +34,14 @@ impl Column {
             Column::Integer(c) => c.size(),
         }
     }
+
+    pub fn min(&self) -> Scalar {
+        match self {
+            Column::String(c) => Scalar::String(c.meta.range().0),
+            Column::Float(c) => Scalar::Float(c.meta.range().0),
+            Column::Integer(c) => Scalar::Integer(c.meta.range().0),
+        }
+    }
 }
 
 impl From<&[f64]> for Column {
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 8c24ab7c7c..65cc7f21f4 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -2,7 +2,7 @@ pub mod column;
 pub mod encoding;
 pub mod segment;
 
-use segment::Segment;
+use segment::{Segment, Segments};
 
 #[derive(Debug, Default)]
 pub struct Store {
@@ -25,4 +25,8 @@ impl Store {
     pub fn segment_total(&self) -> usize {
         self.segments.len()
     }
+
+    pub fn segments(&self) -> Segments {
+        Segments::new(&self.segments)
+    }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 873364de67..d12dcfe14d 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -9,8 +9,6 @@ pub struct Segment {
 
     // Columns within a segment
     columns: Vec<column::Column>,
-    // string_columns: Vec<column::String>,
-    // f64_columns: Vec<column::F64>,
 }
 
 impl Segment {
@@ -24,8 +22,8 @@ impl Segment {
         self.meta.rows
     }
 
-    pub fn column_names(&self) -> Vec<String> {
-        self.meta.column_names.clone()
+    pub fn column_names(&self) -> &[String] {
+        &self.meta.column_names
     }
 
     pub fn time_range(&self) -> (i64, i64) {
@@ -65,13 +63,13 @@ impl Segment {
         for (i, column) in self.columns.iter().enumerate() {
             match column {
                 Column::String(c) => {
-                    column_sizes.insert(names[i].clone(), c.size());
+                    column_sizes.insert(names[i].to_owned(), c.size());
                 }
                 Column::Float(c) => {
-                    column_sizes.insert(names[i].clone(), c.size());
+                    column_sizes.insert(names[i].to_owned(), c.size());
                 }
                 Column::Integer(c) => {
-                    column_sizes.insert(names[i].clone(), c.size());
+                    column_sizes.insert(names[i].to_owned(), c.size());
                 }
             }
         }
@@ -79,6 +77,37 @@ impl Segment {
     }
 }
 
+pub struct Segments<'a> {
+    segments: &'a [Segment],
+}
+
+impl<'a> Segments<'a> {
+    pub fn new(segments: &'a [Segment]) -> Self {
+        Self { segments }
+    }
+
+    /// Returns the minimum value for a column in a set of segments.
+    pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {
+        if self.segments.is_empty() {
+            return None;
+        }
+
+        let mut min_min: Option<column::Scalar> = None;
+        for segment in self.segments {
+            if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
+                let min = Some(segment.columns[i].min());
+                if min_min.is_none() {
+                    min_min = min
+                } else if min_min > min {
+                    min_min = min;
+                }
+            }
+        }
+
+        min_min
+    }
+}
+
 /// Meta data for a segment. This data is mainly used to determine if a segment
 /// may contain value for answering a query.
 #[derive(Debug, Default)]

From 527083f7a0657939f517e9eac69231f12c3da359 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 18:32:16 +0100
Subject: [PATCH 06/73] feat: column max and column first

---
 delorean_mem_qe/src/bin/main.rs | 89 +++++++++++++++++++++++++--------
 delorean_mem_qe/src/column.rs   | 58 +++++++++++++++++++++
 delorean_mem_qe/src/encoding.rs | 68 ++++++++++++++++---------
 delorean_mem_qe/src/segment.rs  | 64 ++++++++++++++++++++++++
 4 files changed, 236 insertions(+), 43 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 9c5564ea69..9eb9b584fb 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -36,26 +36,9 @@ fn main() {
         store.size(),
     );
 
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_min = 0;
-    for _ in 1..10000 {
-        let now = std::time::Instant::now();
-        let segments = store.segments();
-        let min = segments.column_min("time").unwrap();
-        total_time += now.elapsed();
-
-        if let Scalar::Integer(v) = min {
-            total_min += v
-        }
-    }
-    println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
-        10000,
-        total_time,
-        total_time / 10000,
-        total_min
-    );
-    // println!("{:?} min -> {:?} in {:?}", "time", min, elapsed);
+    // time_column_min_time(&store);
+    // time_column_max_time(&store);
+    time_column_first(&store);
 }
 
 fn build_store(
@@ -120,3 +103,69 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
     }
     Ok(segment)
 }
+
+fn time_column_min_time(store: &Store) {
+    let repeat = 1000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_min = 0;
+    for _ in 1..repeat {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let min = segments.column_min("time").unwrap();
+        total_time += now.elapsed();
+
+        if let Scalar::Integer(v) = min {
+            total_min += v
+        }
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_min
+    );
+}
+
+fn time_column_max_time(store: &Store) {
+    let repeat = 1000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    for _ in 1..repeat {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let max = segments.column_max("time").unwrap();
+        total_time += now.elapsed();
+
+        if let Scalar::Integer(v) = max {
+            total_max += v
+        }
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
+
+fn time_column_first(store: &Store) {
+    let repeat = 100000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    for _ in 1..repeat {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let res = segments.first("host").unwrap();
+        total_time += now.elapsed();
+        total_max += res.0;
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 81627dcd11..e2233e06cc 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -35,6 +35,32 @@ impl Column {
         }
     }
 
+    pub fn value(&self, row_id: usize) -> Option<Scalar> {
+        match self {
+            Column::String(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+                if let Some(v) = c.value(row_id) {
+                    return Some(Scalar::String(v));
+                };
+                None
+            }
+            Column::Float(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+                Some(Scalar::Float(c.value(row_id)))
+            }
+            Column::Integer(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+                Some(Scalar::Integer(c.value(row_id)))
+            }
+        }
+    }
+
     pub fn min(&self) -> Scalar {
         match self {
             Column::String(c) => Scalar::String(c.meta.range().0),
@@ -42,6 +68,14 @@ impl Column {
             Column::Integer(c) => Scalar::Integer(c.meta.range().0),
         }
     }
+
+    pub fn max(&self) -> Scalar {
+        match self {
+            Column::String(c) => Scalar::String(c.meta.range().1),
+            Column::Float(c) => Scalar::Float(c.meta.range().1),
+            Column::Integer(c) => Scalar::Integer(c.meta.range().1),
+        }
+    }
 }
 
 impl From<&[f64]> for Column {
@@ -82,6 +116,10 @@ impl String {
     pub fn size(&self) -> usize {
         self.meta.size() + self.data.size()
     }
+
+    pub fn value(&self, row_id: usize) -> Option<&std::string::String> {
+        self.data.value(row_id)
+    }
 }
 
 #[derive(Debug, Default)]
@@ -100,6 +138,10 @@ impl Float {
     pub fn size(&self) -> usize {
         self.meta.size() + self.data.size()
     }
+
+    pub fn value(&self, row_id: usize) -> f64 {
+        self.data.value(row_id)
+    }
 }
 
 impl From<&[f64]> for Float {
@@ -137,6 +179,18 @@ impl Integer {
     pub fn size(&self) -> usize {
         self.meta.size() + self.data.size()
     }
+
+    pub fn value(&self, row_id: usize) -> i64 {
+        self.data.value(row_id)
+    }
+
+    /// Find the first logical row that contains this value.
+    pub fn row_id_for_value(&self, v: i64) -> Option<usize> {
+        if !self.meta.maybe_contains_value(v) {
+            return None;
+        }
+        self.data.row_id_for_value(v)
+    }
 }
 
 impl From<&[i64]> for Integer {
@@ -233,6 +287,10 @@ pub mod metadata {
             }
         }
 
+        pub fn maybe_contains_value(&self, v: i64) -> bool {
+            self.range.0 <= v && v <= self.range.1
+        }
+
         pub fn num_rows(&self) -> usize {
             self.num_rows
         }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index b760771cdb..e06dac8b3d 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -18,30 +18,53 @@ impl<T> PlainFixedOption<T> {
 // No compression
 pub struct PlainFixed<T> {
     values: Vec<T>,
+    total_order: bool, // if true the column is totally ordered ascending.
 }
 
-impl<T> PlainFixed<T> {
+impl<T> PlainFixed<T>
+where
+    T: PartialEq + Copy,
+{
     pub fn size(&self) -> usize {
         self.values.len() * std::mem::size_of::<T>()
     }
+
+    pub fn row_id_for_value(&self, v: T) -> Option<usize> {
+        self.values.iter().position(|x| *x == v)
+    }
+
+    // get value at row_id. Panics if out of bounds.
+    pub fn value(&self, row_id: usize) -> T {
+        self.values[row_id]
+    }
 }
 
 impl From<&[i64]> for PlainFixed<i64> {
     fn from(v: &[i64]) -> Self {
-        Self { values: v.to_vec() }
+        Self {
+            values: v.to_vec(),
+            total_order: false,
+        }
     }
 }
 
 impl From<&[f64]> for PlainFixed<f64> {
     fn from(v: &[f64]) -> Self {
-        Self { values: v.to_vec() }
+        Self {
+            values: v.to_vec(),
+            total_order: false,
+        }
     }
 }
 
 #[derive(Debug, Default)]
 pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
-    map: BTreeMap<String, usize>,
+    entry_index: BTreeMap<String, usize>,
+
+    // stores the mapping between an index and its entry.
+    index_entry: BTreeMap<usize, String>,
+
     map_size: usize, // TODO(edd) this isn't perfect at all
 
     // stores tuples where each pair refers to a dictionary entry and the number
@@ -55,7 +78,8 @@ pub struct DictionaryRLE {
 impl DictionaryRLE {
     pub fn new() -> Self {
         Self {
-            map: BTreeMap::new(),
+            entry_index: BTreeMap::new(),
+            index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
             run_length_size: 0,
@@ -69,7 +93,7 @@ impl DictionaryRLE {
 
     pub fn push_additional(&mut self, v: &str, additional: u64) {
         self.total += additional;
-        let idx = self.map.get(v);
+        let idx = self.entry_index.get(v);
         match idx {
             Some(idx) => {
                 if let Some((last_idx, rl)) = self.run_lengths.last_mut() {
@@ -86,9 +110,10 @@ impl DictionaryRLE {
             None => {
                 // New dictionary entry.
                 if idx.is_none() {
-                    let idx = self.map.len();
+                    let idx = self.entry_index.len();
 
-                    self.map.insert(String::from(v), idx);
+                    self.entry_index.insert(String::from(v), idx);
+                    self.index_entry.insert(idx, String::from(v));
                     self.map_size += v.len() + std::mem::size_of::<usize>();
 
                     self.run_lengths.push((idx, additional));
@@ -103,7 +128,7 @@ impl DictionaryRLE {
     // value.
     pub fn row_ids(&self, value: &str) -> impl iter::Iterator<Item = usize> {
         let mut out: Vec<usize> = vec![];
-        if let Some(idx) = self.map.get(value) {
+        if let Some(idx) = self.entry_index.get(value) {
             let mut index: usize = 0;
             for (other_idx, other_rl) in &self.run_lengths {
                 let start = index;
@@ -120,7 +145,7 @@ impl DictionaryRLE {
     // value.
     pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap {
         let mut bm = croaring::Bitmap::create();
-        if let Some(idx) = self.map.get(value) {
+        if let Some(idx) = self.entry_index.get(value) {
             let mut index: u64 = 0;
             for (other_idx, other_rl) in &self.run_lengths {
                 let start = index;
@@ -155,24 +180,20 @@ impl DictionaryRLE {
     // }
 
     pub fn dictionary(&self) -> BTreeSet<String> {
-        self.map.keys().cloned().collect::<BTreeSet<String>>()
+        self.entry_index
+            .keys()
+            .cloned()
+            .collect::<BTreeSet<String>>()
     }
 
     // get the logical value at the provided index, or None if there is no value
     // at index.
-    pub fn value(&self, index: usize) -> Option<&str> {
+    pub fn value(&self, index: usize) -> Option<&String> {
         if index < self.total as usize {
-            // build reverse mapping.
-            let mut idx_value = BTreeMap::new();
-            for (k, v) in &self.map {
-                idx_value.insert(v, k.as_str());
-            }
-            assert_eq!(idx_value.len(), self.map.len());
-
             let mut total = 0;
             for (idx, rl) in &self.run_lengths {
                 if total + rl > index as u64 {
-                    return idx_value.get(idx).cloned();
+                    return self.index_entry.get(idx);
                 }
                 total += rl;
             }
@@ -187,10 +208,10 @@ impl DictionaryRLE {
 
         // build reverse mapping.
         let mut idx_value = BTreeMap::new();
-        for (k, v) in &self.map {
+        for (k, v) in &self.entry_index {
             idx_value.insert(v, k.as_str());
         }
-        assert_eq!(idx_value.len(), self.map.len());
+        assert_eq!(idx_value.len(), self.entry_index.len());
 
         for (idx, rl) in &self.run_lengths {
             let &v = idx_value.get(&idx).unwrap();
@@ -200,7 +221,8 @@ impl DictionaryRLE {
     }
 
     pub fn size(&self) -> usize {
-        self.map_size + self.run_length_size
+        // mapping and reverse mapping then the rles
+        2 * self.map_size + self.run_length_size
     }
 }
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index d12dcfe14d..e1ccf413f0 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -9,6 +9,7 @@ pub struct Segment {
 
     // Columns within a segment
     columns: Vec<column::Column>,
+    time_column_idx: usize,
 }
 
 impl Segment {
@@ -26,6 +27,14 @@ impl Segment {
         &self.meta.column_names
     }
 
+    /// column returns the column with name
+    pub fn column(&self, name: &str) -> Option<&column::Column> {
+        if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) {
+            return self.columns.get(*id);
+        }
+        None
+    }
+
     pub fn time_range(&self) -> (i64, i64) {
         self.meta.time_range
     }
@@ -38,6 +47,7 @@ impl Segment {
             } else {
                 panic!("incorrect column type for time");
             }
+            self.time_column_idx = self.columns.len();
         }
         self.meta.rows = c.num_rows();
 
@@ -106,6 +116,60 @@ impl<'a> Segments<'a> {
 
         min_min
     }
+
+    /// Returns the maximum value for a column in a set of segments.
+    pub fn column_max(&self, column_name: &str) -> Option<column::Scalar> {
+        if self.segments.is_empty() {
+            return None;
+        }
+
+        let mut max_max: Option<column::Scalar> = None;
+        for segment in self.segments {
+            if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
+                let max = Some(segment.columns[i].max());
+                if max_max.is_none() {
+                    max_max = max
+                } else if max_max < max {
+                    max_max = max;
+                }
+            }
+        }
+
+        max_max
+    }
+
+    /// Returns the first value for a column in a set of segments.
+    ///
+    /// TODO(edd): could return NULL value..
+    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+        if self.segments.is_empty() {
+            return None;
+        }
+
+        let mut first_first: Option<(i64, Option<column::Scalar>)> = None;
+        for segment in self.segments {
+            // first find the logical row id of the minimum timestamp value
+            if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
+                // TODO(edd): clean up unwr
+                let min_ts = ts_col.column_range().0;
+                let min_ts_id = ts_col.row_id_for_value(min_ts).unwrap();
+
+                // now we have row id we can get value for that row id
+                let value = segment.column(column_name).unwrap().value(min_ts_id);
+
+                match &first_first {
+                    Some(prev) => {
+                        if prev.0 > min_ts {
+                            first_first = Some((min_ts, value));
+                        }
+                    }
+                    None => first_first = Some((min_ts, value)),
+                }
+            }
+        }
+
+        first_first
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment

From a5384d1771644e63848c8470709d7d6500fb6e2c Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 20:15:59 +0100
Subject: [PATCH 07/73] feat: column last and filter by time

---
 delorean_mem_qe/src/bin/main.rs | 11 ++++--
 delorean_mem_qe/src/lib.rs      |  2 +-
 delorean_mem_qe/src/segment.rs  | 64 ++++++++++++++++++++++++++++++---
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 9eb9b584fb..4a00d0b181 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -38,7 +38,14 @@ fn main() {
 
     // time_column_min_time(&store);
     // time_column_max_time(&store);
-    time_column_first(&store);
+    // time_column_first(&store);
+    let segments = store.segments();
+    let res = segments.last("host").unwrap();
+    println!("{:?}", res);
+
+    let segments = segments.filter_by_time(1590036110000000, 1590044410000000);
+    let res = segments.first("env").unwrap();
+    println!("{:?}", res);
 }
 
 fn build_store(
@@ -52,7 +59,7 @@ fn build_store(
     Ok(())
 }
 
-fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
+fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
     let mut segment = Segment::default();
 
     // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 65cc7f21f4..26b78d9963 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -27,6 +27,6 @@ impl Store {
     }
 
     pub fn segments(&self) -> Segments {
-        Segments::new(&self.segments)
+        Segments::new(self.segments.iter().collect::<Vec<&Segment>>())
     }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index e1ccf413f0..6804acb2d0 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -88,14 +88,24 @@ impl Segment {
 }
 
 pub struct Segments<'a> {
-    segments: &'a [Segment],
+    segments: Vec<&'a Segment>,
 }
 
 impl<'a> Segments<'a> {
-    pub fn new(segments: &'a [Segment]) -> Self {
+    pub fn new(segments: Vec<&'a Segment>) -> Self {
         Self { segments }
     }
 
+    pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> {
+        let mut segments: Vec<&Segment> = vec![];
+        for segment in &self.segments {
+            if segment.meta.overlaps_time_range(min, max) {
+                segments.push(segment);
+            }
+        }
+        Self::new(segments)
+    }
+
     /// Returns the minimum value for a column in a set of segments.
     pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {
         if self.segments.is_empty() {
@@ -103,7 +113,7 @@ impl<'a> Segments<'a> {
         }
 
         let mut min_min: Option<column::Scalar> = None;
-        for segment in self.segments {
+        for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let min = Some(segment.columns[i].min());
                 if min_min.is_none() {
@@ -124,7 +134,7 @@ impl<'a> Segments<'a> {
         }
 
         let mut max_max: Option<column::Scalar> = None;
-        for segment in self.segments {
+        for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let max = Some(segment.columns[i].max());
                 if max_max.is_none() {
@@ -147,7 +157,7 @@ impl<'a> Segments<'a> {
         }
 
         let mut first_first: Option<(i64, Option<column::Scalar>)> = None;
-        for segment in self.segments {
+        for segment in &self.segments {
             // first find the logical row id of the minimum timestamp value
             if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
                 // TODO(edd): clean up unwr
@@ -170,6 +180,39 @@ impl<'a> Segments<'a> {
 
         first_first
     }
+
+    /// Returns the last value for a column in a set of segments.
+    ///
+    /// TODO(edd): could return NULL value..
+    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+        if self.segments.is_empty() {
+            return None;
+        }
+
+        let mut last_last: Option<(i64, Option<column::Scalar>)> = None;
+        for segment in &self.segments {
+            // first find the logical row id of the minimum timestamp value
+            if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
+                // TODO(edd): clean up unwr
+                let max_ts = ts_col.column_range().1;
+                let max_ts_id = ts_col.row_id_for_value(max_ts).unwrap();
+
+                // now we have row id we can get value for that row id
+                let value = segment.column(column_name).unwrap().value(max_ts_id);
+
+                match &last_last {
+                    Some(prev) => {
+                        if prev.0 < max_ts {
+                            last_last = Some((max_ts, value));
+                        }
+                    }
+                    None => last_last = Some((max_ts, value)),
+                }
+            }
+        }
+
+        last_last
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
@@ -184,5 +227,16 @@ pub struct SegmentMetaData {
     // TODO column sort order
 }
 
+impl SegmentMetaData {
+    pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {
+        let result = self.time_range.0 <= to && from <= self.time_range.1;
+        println!(
+            "segment with ({:?}) overlaps ({:?}, {:?}) -- {:?}",
+            self.time_range, from, to, result
+        );
+        result
+    }
+}
+
 #[cfg(test)]
 mod test {}

From 69bc0424bf13d733e90fdedfa2c5bb1690f849c1 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 21:17:49 +0100
Subject: [PATCH 08/73] feat: add filter by tag

---
 delorean_mem_qe/src/bin/main.rs | 19 +++++++++++---
 delorean_mem_qe/src/column.rs   | 44 +++++++++++++++++++++++++++++++++
 delorean_mem_qe/src/segment.rs  | 16 ++++++++++++
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 4a00d0b181..82e5adc4e4 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -43,7 +43,9 @@ fn main() {
     let res = segments.last("host").unwrap();
     println!("{:?}", res);
 
-    let segments = segments.filter_by_time(1590036110000000, 1590044410000000);
+    let segments = segments
+        .filter_by_time(1590036110000000, 1590044410000000)
+        .filter_by_predicate_eq("env", &column::Scalar::String("toolsus1"));
     let res = segments.first("env").unwrap();
     println!("{:?}", res);
 }
@@ -66,6 +68,9 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
     for (i, column) in rb.columns().iter().enumerate() {
         match *column.data_type() {
             datatypes::DataType::Float64 => {
+                if column.null_count() > 0 {
+                    panic!("null floats");
+                }
                 let arr = column
                     .as_any()
                     .downcast_ref::<array::Float64Array>()
@@ -75,18 +80,24 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
                 segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Int64 => {
+                if column.null_count() > 0 {
+                    panic!("null times");
+                }
                 let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
 
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Utf8 => {
+                if column.null_count() > 0 {
+                    panic!("null tag");
+                }
                 let arr = column
                     .as_any()
                     .downcast_ref::<array::StringArray>()
                     .unwrap();
 
-                let mut column = column::String::default();
+                let mut c = column::String::default();
                 let mut prev = arr.value(0);
                 let mut count = 1_u64;
                 for j in 1..arr.len() {
@@ -94,13 +105,13 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
                     if prev == next {
                         count += 1;
                     } else {
-                        column.add_additional(prev, count);
+                        c.add_additional(prev, count);
                         prev = next;
                         count = 1;
                     }
                 }
 
-                segment.add_column(rb.schema().field(i).name(), Column::String(column));
+                segment.add_column(rb.schema().field(i).name(), Column::String(c));
             }
             datatypes::DataType::Boolean => {
                 panic!("unsupported");
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index e2233e06cc..3676e13c2d 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -61,6 +61,32 @@ impl Column {
         }
     }
 
+    pub fn maybe_contains(&self, value: &Scalar) -> bool {
+        match self {
+            Column::String(c) => {
+                if let Scalar::String(v) = value {
+                    c.meta.maybe_contains_value(v.to_string())
+                } else {
+                    panic!("invalid value");
+                }
+            }
+            Column::Float(c) => {
+                if let Scalar::Float(v) = value {
+                    c.meta.maybe_contains_value(v.to_owned())
+                } else {
+                    panic!("invalid value");
+                }
+            }
+            Column::Integer(c) => {
+                if let Scalar::Integer(v) = value {
+                    c.meta.maybe_contains_value(v.to_owned())
+                } else {
+                    panic!("invalid value");
+                }
+            }
+        }
+    }
+
     pub fn min(&self) -> Scalar {
         match self {
             Column::String(c) => Scalar::String(c.meta.range().0),
@@ -237,6 +263,15 @@ pub mod metadata {
             self.num_rows
         }
 
+        pub fn maybe_contains_value(&self, v: String) -> bool {
+            let res = self.range.0 <= v && v <= self.range.1;
+            println!(
+                "column with ({:?}) maybe contain {:?} -- {:?}",
+                self.range, v, res
+            );
+            res
+        }
+
         pub fn range(&self) -> (&str, &str) {
             (&self.range.0, &self.range.1)
         }
@@ -260,6 +295,15 @@ pub mod metadata {
             }
         }
 
+        pub fn maybe_contains_value(&self, v: f64) -> bool {
+            let res = self.range.0 <= v && v <= self.range.1;
+            println!(
+                "column with ({:?}) maybe contain {:?} -- {:?}",
+                self.range, v, res
+            );
+            res
+        }
+
         pub fn num_rows(&self) -> usize {
             self.num_rows
         }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 6804acb2d0..1f1084c0e8 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -106,6 +106,22 @@ impl<'a> Segments<'a> {
         Self::new(segments)
     }
 
+    pub fn filter_by_predicate_eq(
+        &self,
+        column_name: &str,
+        value: &column::Scalar,
+    ) -> Segments<'a> {
+        let mut segments: Vec<&Segment> = vec![];
+        for segment in &self.segments {
+            if let Some(col) = segment.column(column_name) {
+                if col.maybe_contains(&value) {
+                    segments.push(segment);
+                }
+            }
+        }
+        Self::new(segments)
+    }
+
     /// Returns the minimum value for a column in a set of segments.
     pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {
         if self.segments.is_empty() {

From d43d7bb3d4763bd4b1e0a1731856801f7c1e821b Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 5 Aug 2020 22:42:26 +0100
Subject: [PATCH 09/73] feat: nullable tags

---
 delorean_mem_qe/src/bin/main.rs |  26 +++++---
 delorean_mem_qe/src/column.rs   |  70 ++++++++++++--------
 delorean_mem_qe/src/encoding.rs | 113 ++++++++++++++++++++++----------
 delorean_mem_qe/src/segment.rs  |  46 ++++++-------
 4 files changed, 159 insertions(+), 96 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 82e5adc4e4..ae104d35a8 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -45,8 +45,8 @@ fn main() {
 
     let segments = segments
         .filter_by_time(1590036110000000, 1590044410000000)
-        .filter_by_predicate_eq("env", &column::Scalar::String("toolsus1"));
-    let res = segments.first("env").unwrap();
+        .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1"));
+    let res = segments.first("env");
     println!("{:?}", res);
 }
 
@@ -61,7 +61,7 @@ fn build_store(
     Ok(())
 }
 
-fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
+fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
     let mut segment = Segment::default();
 
     // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
@@ -89,23 +89,31 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result<Segment, Error> {
                 segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Utf8 => {
-                if column.null_count() > 0 {
-                    panic!("null tag");
-                }
                 let arr = column
                     .as_any()
                     .downcast_ref::<array::StringArray>()
                     .unwrap();
 
                 let mut c = column::String::default();
-                let mut prev = arr.value(0);
+                let mut prev: Option<&str> = None;
+                if !column.is_null(0) {
+                    prev = Some(arr.value(0));
+                }
+
                 let mut count = 1_u64;
                 for j in 1..arr.len() {
-                    let next = arr.value(j);
+                    let mut next = Some(arr.value(j));
+                    if column.is_null(j) {
+                        next = None;
+                    }
+
                     if prev == next {
                         count += 1;
                     } else {
-                        c.add_additional(prev, count);
+                        match prev {
+                            Some(x) => c.add_additional(Some(x.to_string()), count),
+                            None => c.add_additional(None, count),
+                        }
                         prev = next;
                         count = 1;
                     }
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 3676e13c2d..4597cf80c5 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -41,10 +41,11 @@ impl Column {
                 if row_id >= self.num_rows() {
                     return None;
                 }
-                if let Some(v) = c.value(row_id) {
-                    return Some(Scalar::String(v));
-                };
-                None
+
+                match c.value(row_id) {
+                    Some(v) => Some(Scalar::String(v)),
+                    None => None,
+                }
             }
             Column::Float(c) => {
                 if row_id >= self.num_rows() {
@@ -65,7 +66,7 @@ impl Column {
         match self {
             Column::String(c) => {
                 if let Scalar::String(v) = value {
-                    c.meta.maybe_contains_value(v.to_string())
+                    c.meta.maybe_contains_value(&v.to_string())
                 } else {
                     panic!("invalid value");
                 }
@@ -87,19 +88,31 @@ impl Column {
         }
     }
 
-    pub fn min(&self) -> Scalar {
+    // FIXME(edd): Support NULL integers and floats
+    pub fn min(&self) -> Option<Scalar> {
         match self {
-            Column::String(c) => Scalar::String(c.meta.range().0),
-            Column::Float(c) => Scalar::Float(c.meta.range().0),
-            Column::Integer(c) => Scalar::Integer(c.meta.range().0),
+            Column::String(c) => {
+                if let Some(min) = c.meta.range().0 {
+                    return Some(Scalar::String(min));
+                }
+                None
+            }
+            Column::Float(c) => Some(Scalar::Float(c.meta.range().0)),
+            Column::Integer(c) => Some(Scalar::Integer(c.meta.range().0)),
         }
     }
 
-    pub fn max(&self) -> Scalar {
+    // FIXME(edd): Support NULL integers and floats
+    pub fn max(&self) -> Option<Scalar> {
         match self {
-            Column::String(c) => Scalar::String(c.meta.range().1),
-            Column::Float(c) => Scalar::Float(c.meta.range().1),
-            Column::Integer(c) => Scalar::Integer(c.meta.range().1),
+            Column::String(c) => {
+                if let Some(max) = c.meta.range().1 {
+                    return Some(Scalar::String(max));
+                }
+                None
+            }
+            Column::Float(c) => Some(Scalar::Float(c.meta.range().1)),
+            Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)),
         }
     }
 }
@@ -126,16 +139,16 @@ pub struct String {
 
 impl String {
     pub fn add(&mut self, s: &str) {
-        self.meta.add(s);
+        self.meta.add(Some(s.to_string()));
         self.data.push(s);
     }
 
-    pub fn add_additional(&mut self, s: &str, additional: u64) {
-        self.meta.add(s);
+    pub fn add_additional(&mut self, s: Option<std::string::String>, additional: u64) {
+        self.meta.add(s.clone());
         self.data.push_additional(s, additional);
     }
 
-    pub fn column_range(&self) -> (&str, &str) {
+    pub fn column_range(&self) -> (Option<&std::string::String>, Option<&std::string::String>) {
         self.meta.range()
     }
 
@@ -241,21 +254,21 @@ impl From<&[i64]> for Integer {
 pub mod metadata {
     #[derive(Debug, Default)]
     pub struct Str {
-        range: (String, String),
+        range: (Option<String>, Option<String>),
         num_rows: usize,
         // sparse_index: BTreeMap<String, usize>,
     }
 
     impl Str {
-        pub fn add(&mut self, s: &str) {
+        pub fn add(&mut self, s: Option<String>) {
             self.num_rows += 1;
 
-            if self.range.0.as_str() > s {
-                self.range.0 = s.to_owned();
+            if self.range.0 > s {
+                self.range.0 = s.clone();
             }
 
-            if self.range.1.as_str() < s {
-                self.range.1 = s.to_owned();
+            if self.range.1 < s {
+                self.range.1 = s;
             }
         }
 
@@ -263,8 +276,8 @@ pub mod metadata {
             self.num_rows
         }
 
-        pub fn maybe_contains_value(&self, v: String) -> bool {
-            let res = self.range.0 <= v && v <= self.range.1;
+        pub fn maybe_contains_value(&self, v: &str) -> bool {
+            let res = self.range.0 <= Some(v.to_string()) && Some(v.to_string()) <= self.range.1;
             println!(
                 "column with ({:?}) maybe contain {:?} -- {:?}",
                 self.range, v, res
@@ -272,12 +285,13 @@ pub mod metadata {
             res
         }
 
-        pub fn range(&self) -> (&str, &str) {
-            (&self.range.0, &self.range.1)
+        pub fn range(&self) -> (Option<&String>, Option<&String>) {
+            (self.range.0.as_ref(), self.range.1.as_ref())
         }
 
         pub fn size(&self) -> usize {
-            self.range.0.len() + self.range.1.len() + std::mem::size_of::<usize>()
+            // TODO!!!!
+            0 //self.range.0.len() + self.range.1.len() + std::mem::size_of::<usize>()
         }
     }
 
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index e06dac8b3d..283398223f 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -60,10 +60,10 @@ impl From<&[f64]> for PlainFixed<f64> {
 #[derive(Debug, Default)]
 pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
-    entry_index: BTreeMap<String, usize>,
+    entry_index: BTreeMap<Option<String>, usize>,
 
     // stores the mapping between an index and its entry.
-    index_entry: BTreeMap<usize, String>,
+    index_entry: BTreeMap<usize, Option<String>>,
 
     map_size: usize, // TODO(edd) this isn't perfect at all
 
@@ -88,12 +88,16 @@ impl DictionaryRLE {
     }
 
     pub fn push(&mut self, v: &str) {
-        self.push_additional(v, 1);
+        self.push_additional(Some(v.to_owned()), 1);
     }
 
-    pub fn push_additional(&mut self, v: &str, additional: u64) {
+    pub fn push_none(&mut self) {
+        self.push_additional(None, 1);
+    }
+
+    pub fn push_additional(&mut self, v: Option<String>, additional: u64) {
         self.total += additional;
-        let idx = self.entry_index.get(v);
+        let idx = self.entry_index.get(&v);
         match idx {
             Some(idx) => {
                 if let Some((last_idx, rl)) = self.run_lengths.last_mut() {
@@ -112,9 +116,12 @@ impl DictionaryRLE {
                 if idx.is_none() {
                     let idx = self.entry_index.len();
 
-                    self.entry_index.insert(String::from(v), idx);
-                    self.index_entry.insert(idx, String::from(v));
-                    self.map_size += v.len() + std::mem::size_of::<usize>();
+                    self.entry_index.insert(v.clone(), idx);
+                    if let Some(value) = &v {
+                        self.map_size += value.len();
+                    }
+                    self.index_entry.insert(idx, v);
+                    self.map_size += 8 + std::mem::size_of::<usize>(); // TODO(edd): clean this option size up
 
                     self.run_lengths.push((idx, additional));
                     self.run_length_size += std::mem::size_of::<(usize, u64)>();
@@ -126,9 +133,9 @@ impl DictionaryRLE {
 
     // row_ids returns an iterator over the set of row ids matching the provided
     // value.
-    pub fn row_ids(&self, value: &str) -> impl iter::Iterator<Item = usize> {
+    pub fn row_ids(&self, value: Option<String>) -> impl iter::Iterator<Item = usize> {
         let mut out: Vec<usize> = vec![];
-        if let Some(idx) = self.entry_index.get(value) {
+        if let Some(idx) = self.entry_index.get(&value) {
             let mut index: usize = 0;
             for (other_idx, other_rl) in &self.run_lengths {
                 let start = index;
@@ -143,9 +150,9 @@ impl DictionaryRLE {
 
     // row_ids returns an iterator over the set of row ids matching the provided
     // value.
-    pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap {
+    pub fn row_ids_roaring(&self, value: Option<String>) -> croaring::Bitmap {
         let mut bm = croaring::Bitmap::create();
-        if let Some(idx) = self.entry_index.get(value) {
+        if let Some(idx) = self.entry_index.get(&value) {
             let mut index: u64 = 0;
             for (other_idx, other_rl) in &self.run_lengths {
                 let start = index;
@@ -179,11 +186,11 @@ impl DictionaryRLE {
     //     unreachable!("for now");
     // }
 
-    pub fn dictionary(&self) -> BTreeSet<String> {
+    pub fn dictionary(&self) -> BTreeSet<Option<String>> {
         self.entry_index
             .keys()
             .cloned()
-            .collect::<BTreeSet<String>>()
+            .collect::<BTreeSet<Option<String>>>()
     }
 
     // get the logical value at the provided index, or None if there is no value
@@ -193,7 +200,12 @@ impl DictionaryRLE {
             let mut total = 0;
             for (idx, rl) in &self.run_lengths {
                 if total + rl > index as u64 {
-                    return self.index_entry.get(idx);
+                    // TODO(edd): Can this really be idiomatic???
+                    match self.index_entry.get(idx) {
+                        Some(&Some(ref result)) => return Some(result),
+                        Some(&None) => return None,
+                        None => return None,
+                    }
                 }
                 total += rl;
             }
@@ -203,19 +215,20 @@ impl DictionaryRLE {
 
     // values materialises a vector of references to all logical values in the
     // encoding.
-    pub fn values(&mut self) -> Vec<&str> {
-        let mut out = Vec::with_capacity(self.total as usize);
+    pub fn values(&mut self) -> Vec<Option<&String>> {
+        let mut out: Vec<Option<&String>> = Vec::with_capacity(self.total as usize);
 
         // build reverse mapping.
         let mut idx_value = BTreeMap::new();
         for (k, v) in &self.entry_index {
-            idx_value.insert(v, k.as_str());
+            idx_value.insert(v, k);
         }
         assert_eq!(idx_value.len(), self.entry_index.len());
 
         for (idx, rl) in &self.run_lengths {
-            let &v = idx_value.get(&idx).unwrap();
-            out.extend(iter::repeat(&v).take(*rl as usize));
+            // TODO(edd): fix unwrap - we know that the value exists in map...
+            let v = idx_value.get(&idx).unwrap().as_ref();
+            out.extend(iter::repeat(v).take(*rl as usize));
         }
         out
     }
@@ -261,17 +274,34 @@ mod test {
         drle.push("world");
         drle.push("hello");
         drle.push("hello");
-        drle.push_additional("hello", 1);
+        drle.push_additional(Some("hello".to_string()), 1);
 
         assert_eq!(
             drle.values(),
-            ["hello", "hello", "world", "hello", "hello", "hello",]
+            [
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"world".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string())
+            ]
         );
 
-        drle.push_additional("zoo", 3);
+        drle.push_additional(Some("zoo".to_string()), 3);
         assert_eq!(
             drle.values(),
-            ["hello", "hello", "world", "hello", "hello", "hello", "zoo", "zoo", "zoo"]
+            [
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"world".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"hello".to_string()),
+                Some(&"zoo".to_string()),
+                Some(&"zoo".to_string()),
+                Some(&"zoo".to_string()),
+            ]
         );
 
         assert_eq!(drle.value(0).unwrap(), "hello");
@@ -288,34 +318,49 @@ mod test {
     #[test]
     fn row_ids() {
         let mut drle = super::DictionaryRLE::new();
-        drle.push_additional("abc", 3);
-        drle.push_additional("dre", 2);
+        drle.push_additional(Some("abc".to_string()), 3);
+        drle.push_additional(Some("dre".to_string()), 2);
         drle.push("abc");
 
-        let ids = drle.row_ids("abc").collect::<Vec<usize>>();
+        let ids = drle
+            .row_ids(Some("abc".to_string()))
+            .collect::<Vec<usize>>();
         assert_eq!(ids, vec![0, 1, 2, 5]);
 
-        let ids = drle.row_ids("dre").collect::<Vec<usize>>();
+        let ids = drle
+            .row_ids(Some("dre".to_string()))
+            .collect::<Vec<usize>>();
         assert_eq!(ids, vec![3, 4]);
 
-        let ids = drle.row_ids("foo").collect::<Vec<usize>>();
+        let ids = drle
+            .row_ids(Some("foo".to_string()))
+            .collect::<Vec<usize>>();
         assert_eq!(ids, vec![]);
     }
 
     #[test]
     fn row_ids_roaring() {
         let mut drle = super::DictionaryRLE::new();
-        drle.push_additional("abc", 3);
-        drle.push_additional("dre", 2);
+        drle.push_additional(Some("abc".to_string()), 3);
+        drle.push_additional(Some("dre".to_string()), 2);
         drle.push("abc");
 
-        let ids = drle.row_ids_roaring("abc").iter().collect::<Vec<u32>>();
+        let ids = drle
+            .row_ids_roaring(Some("abc".to_string()))
+            .iter()
+            .collect::<Vec<u32>>();
         assert_eq!(ids, vec![0, 1, 2, 5]);
 
-        let ids = drle.row_ids_roaring("dre").iter().collect::<Vec<u32>>();
+        let ids = drle
+            .row_ids_roaring(Some("dre".to_string()))
+            .iter()
+            .collect::<Vec<u32>>();
         assert_eq!(ids, vec![3, 4]);
 
-        let ids = drle.row_ids_roaring("foo").iter().collect::<Vec<u32>>();
+        let ids = drle
+            .row_ids_roaring(Some("foo".to_string()))
+            .iter()
+            .collect::<Vec<u32>>();
         assert_eq!(ids, vec![]);
     }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 1f1084c0e8..4b4a58289a 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -87,6 +87,24 @@ impl Segment {
     }
 }
 
+/// Meta data for a segment. This data is mainly used to determine if a segment
+/// may contain value for answering a query.
+#[derive(Debug, Default)]
+pub struct SegmentMetaData {
+    size: usize, // TODO
+    rows: usize,
+
+    column_names: Vec<String>,
+    time_range: (i64, i64),
+    // TODO column sort order
+}
+
+impl SegmentMetaData {
+    pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {
+        self.time_range.0 <= to && from <= self.time_range.1
+    }
+}
+
 pub struct Segments<'a> {
     segments: Vec<&'a Segment>,
 }
@@ -100,6 +118,7 @@ impl<'a> Segments<'a> {
         let mut segments: Vec<&Segment> = vec![];
         for segment in &self.segments {
             if segment.meta.overlaps_time_range(min, max) {
+                println!("Segement {:?} overlaps", segment.meta);
                 segments.push(segment);
             }
         }
@@ -131,7 +150,7 @@ impl<'a> Segments<'a> {
         let mut min_min: Option<column::Scalar> = None;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
-                let min = Some(segment.columns[i].min());
+                let min = segment.columns[i].min();
                 if min_min.is_none() {
                     min_min = min
                 } else if min_min > min {
@@ -152,7 +171,7 @@ impl<'a> Segments<'a> {
         let mut max_max: Option<column::Scalar> = None;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
-                let max = Some(segment.columns[i].max());
+                let max = segment.columns[i].max();
                 if max_max.is_none() {
                     max_max = max
                 } else if max_max < max {
@@ -231,28 +250,5 @@ impl<'a> Segments<'a> {
     }
 }
 
-/// Meta data for a segment. This data is mainly used to determine if a segment
-/// may contain value for answering a query.
-#[derive(Debug, Default)]
-pub struct SegmentMetaData {
-    size: usize, // TODO
-    rows: usize,
-
-    column_names: Vec<String>,
-    time_range: (i64, i64),
-    // TODO column sort order
-}
-
-impl SegmentMetaData {
-    pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {
-        let result = self.time_range.0 <= to && from <= self.time_range.1;
-        println!(
-            "segment with ({:?}) overlaps ({:?}, {:?}) -- {:?}",
-            self.time_range, from, to, result
-        );
-        result
-    }
-}
-
 #[cfg(test)]
 mod test {}

From 270bdefcb10f2185ba7657ba77e6a40aa5378340 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 6 Aug 2020 13:16:13 +0100
Subject: [PATCH 10/73] feat: add ability to scan column and materialise

---
 delorean_mem_qe/src/bin/main.rs |  58 +++++++++------
 delorean_mem_qe/src/column.rs   | 124 ++++++++++++++++++++++++++++++--
 delorean_mem_qe/src/encoding.rs | 121 +++++++++++++++++++++++++++++--
 delorean_mem_qe/src/segment.rs  |  48 +++++++++----
 4 files changed, 308 insertions(+), 43 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index ae104d35a8..44776f035d 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -43,11 +43,16 @@ fn main() {
     let res = segments.last("host").unwrap();
     println!("{:?}", res);
 
-    let segments = segments
-        .filter_by_time(1590036110000000, 1590044410000000)
-        .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1"));
-    let res = segments.first("env");
-    println!("{:?}", res);
+    // let segments = segments
+    //     .filter_by_time(1590036110000000, 1590044410000000)
+    //     .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1"));
+    // let res = segments.first(
+    //     "env",
+    //     &column::Scalar::String("prod01-eu-central-1"),
+    //     1590036110000000,
+    // );
+    // println!("{:?}", res);
+    // let segments = segments.filter_by_time(1590036110000000, 1590044410000000);
 }
 
 fn build_store(
@@ -62,7 +67,7 @@ fn build_store(
 }
 
 fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
-    let mut segment = Segment::default();
+    let mut segment = Segment::new(rb.num_rows());
 
     // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
     for (i, column) in rb.columns().iter().enumerate() {
@@ -95,28 +100,37 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
                     .unwrap();
 
                 let mut c = column::String::default();
-                let mut prev: Option<&str> = None;
-                if !column.is_null(0) {
-                    prev = Some(arr.value(0));
-                }
+                let mut prev = if !column.is_null(0) {
+                    Some(arr.value(0))
+                } else {
+                    None
+                };
 
                 let mut count = 1_u64;
                 for j in 1..arr.len() {
-                    let mut next = Some(arr.value(j));
-                    if column.is_null(j) {
-                        next = None;
-                    }
+                    let next = if column.is_null(j) {
+                        None
+                    } else {
+                        Some(arr.value(j))
+                    };
 
                     if prev == next {
                         count += 1;
-                    } else {
-                        match prev {
-                            Some(x) => c.add_additional(Some(x.to_string()), count),
-                            None => c.add_additional(None, count),
-                        }
-                        prev = next;
-                        count = 1;
+                        continue;
                     }
+
+                    match prev {
+                        Some(x) => c.add_additional(Some(x.to_string()), count),
+                        None => c.add_additional(None, count),
+                    }
+                    prev = next;
+                    count = 1;
+                }
+
+                // Add final batch to column if any
+                match prev {
+                    Some(x) => c.add_additional(Some(x.to_string()), count),
+                    None => c.add_additional(None, count),
                 }
 
                 segment.add_column(rb.schema().field(i).name(), Column::String(c));
@@ -183,7 +197,7 @@ fn time_column_first(store: &Store) {
     for _ in 1..repeat {
         let now = std::time::Instant::now();
         let segments = store.segments();
-        let res = segments.first("host").unwrap();
+        let res = segments.first("host", 0).unwrap();
         total_time += now.elapsed();
         total_max += res.0;
     }
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 4597cf80c5..727e8f2c2e 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -9,6 +9,13 @@ pub enum Scalar<'a> {
     Integer(i64),
 }
 
+#[derive(Debug)]
+pub enum Vector<'a> {
+    String(Vec<&'a Option<std::string::String>>),
+    Float(&'a [f64]),
+    Integer(&'a [i64]),
+}
+
 #[derive(Debug)]
 pub enum Column {
     String(String),
@@ -62,6 +69,64 @@ impl Column {
         }
     }
 
+    /// materialise all rows including and after row_id
+    pub fn scan_from(&self, row_id: usize) -> Option<Vector> {
+        if row_id >= self.num_rows() {
+            println!(
+                "asking for {:?} but only got {:?} rows",
+                row_id,
+                self.num_rows()
+            );
+            return None;
+        }
+
+        println!(
+            "asking for {:?} with a column having {:?} rows",
+            row_id,
+            self.num_rows()
+        );
+        match self {
+            Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
+            Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
+            Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
+        }
+    }
+
+    /// Given the provided row_id scans the column until a non-null value found
+    /// or the column is exhausted.
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<Scalar> {
+        match self {
+            Column::String(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+
+                match c.scan_from_until_some(row_id) {
+                    Some(v) => Some(Scalar::String(v)),
+                    None => None,
+                }
+            }
+            Column::Float(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+                match c.scan_from_until_some(row_id) {
+                    Some(v) => Some(Scalar::Float(v)),
+                    None => None,
+                }
+            }
+            Column::Integer(c) => {
+                if row_id >= self.num_rows() {
+                    return None;
+                }
+                match c.scan_from_until_some(row_id) {
+                    Some(v) => Some(Scalar::Integer(v)),
+                    None => None,
+                }
+            }
+        }
+    }
+
     pub fn maybe_contains(&self, value: &Scalar) -> bool {
         match self {
             Column::String(c) => {
@@ -144,7 +209,7 @@ impl String {
     }
 
     pub fn add_additional(&mut self, s: Option<std::string::String>, additional: u64) {
-        self.meta.add(s.clone());
+        self.meta.add_repeated(s.clone(), additional as usize);
         self.data.push_additional(s, additional);
     }
 
@@ -159,6 +224,15 @@ impl String {
     pub fn value(&self, row_id: usize) -> Option<&std::string::String> {
         self.data.value(row_id)
     }
+
+    pub fn scan_from(&self, row_id: usize) -> Vec<&Option<std::string::String>> {
+        self.data.scan_from(row_id)
+    }
+
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<&std::string::String> {
+        unreachable!("don't need this");
+        // self.data.scan_from_until_some(row_id)
+    }
 }
 
 #[derive(Debug, Default)]
@@ -181,6 +255,14 @@ impl Float {
     pub fn value(&self, row_id: usize) -> f64 {
         self.data.value(row_id)
     }
+
+    pub fn scan_from(&self, row_id: usize) -> &[f64] {
+        self.data.scan_from(row_id)
+    }
+
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<f64> {
+        self.data.scan_from_until_some(row_id)
+    }
 }
 
 impl From<&[f64]> for Float {
@@ -223,12 +305,28 @@ impl Integer {
         self.data.value(row_id)
     }
 
+    pub fn scan_from(&self, row_id: usize) -> &[i64] {
+        self.data.scan_from(row_id)
+    }
+
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<i64> {
+        self.data.scan_from_until_some(row_id)
+    }
+
     /// Find the first logical row that contains this value.
-    pub fn row_id_for_value(&self, v: i64) -> Option<usize> {
+    pub fn row_id_eq_value(&self, v: i64) -> Option<usize> {
         if !self.meta.maybe_contains_value(v) {
             return None;
         }
-        self.data.row_id_for_value(v)
+        self.data.row_id_eq_value(v)
+    }
+
+    /// Find the first logical row that contains a value >= v
+    pub fn row_id_ge_value(&self, v: i64) -> Option<usize> {
+        if self.meta.max() < v {
+            return None;
+        }
+        self.data.row_id_ge_value(v)
     }
 }
 
@@ -263,11 +361,23 @@ pub mod metadata {
         pub fn add(&mut self, s: Option<String>) {
             self.num_rows += 1;
 
-            if self.range.0 > s {
+            if s < self.range.0 {
                 self.range.0 = s.clone();
             }
 
-            if self.range.1 < s {
+            if s > self.range.1 {
+                self.range.1 = s;
+            }
+        }
+
+        pub fn add_repeated(&mut self, s: Option<String>, additional: usize) {
+            self.num_rows += additional;
+
+            if s < self.range.0 {
+                self.range.0 = s.clone();
+            }
+
+            if s > self.range.1 {
                 self.range.1 = s;
             }
         }
@@ -349,6 +459,10 @@ pub mod metadata {
             self.range.0 <= v && v <= self.range.1
         }
 
+        pub fn max(&self) -> i64 {
+            self.range.1
+        }
+
         pub fn num_rows(&self) -> usize {
             self.num_rows
         }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 283398223f..596001d379 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -23,20 +23,37 @@ pub struct PlainFixed<T> {
 
 impl<T> PlainFixed<T>
 where
-    T: PartialEq + Copy,
+    T: PartialEq + PartialOrd + Copy + std::fmt::Debug,
 {
     pub fn size(&self) -> usize {
         self.values.len() * std::mem::size_of::<T>()
     }
 
-    pub fn row_id_for_value(&self, v: T) -> Option<usize> {
+    pub fn row_id_eq_value(&self, v: T) -> Option<usize> {
         self.values.iter().position(|x| *x == v)
     }
 
+    pub fn row_id_ge_value(&self, v: T) -> Option<usize> {
+        self.values.iter().position(|x| *x >= v)
+    }
+
     // get value at row_id. Panics if out of bounds.
     pub fn value(&self, row_id: usize) -> T {
         self.values[row_id]
     }
+
+    // TODO(edd): fix this when added NULL support
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<T> {
+        unreachable!("to remove");
+        // for v in self.values.iter().skip(row_id) {
+        //     return Some(*v);
+        // }
+        // None
+    }
+
+    pub fn scan_from(&self, row_id: usize) -> &[T] {
+        &self.values[row_id..]
+    }
 }
 
 impl From<&[i64]> for PlainFixed<i64> {
@@ -213,6 +230,60 @@ impl DictionaryRLE {
         None
     }
 
+    // materialise a slice of rows starting from index.
+    pub fn scan_from(&self, index: usize) -> Vec<&Option<String>> {
+        let mut result = vec![];
+        if index >= self.total as usize {
+            return result;
+        }
+
+        let start_row_id = index as u64;
+
+        let mut curr_row_id = 0_u64; // this tracks the logical row id.
+        for (idx, rl) in &self.run_lengths {
+            // Fast path - at this point we are just materialising the RLE
+            // contents.
+            if curr_row_id > start_row_id {
+                let row_entry = self.index_entry.get(idx).unwrap();
+                result.extend(vec![row_entry; *rl as usize]);
+                curr_row_id += rl;
+                continue;
+            }
+
+            // Once we have reached the desired starting row_id we can emit values.
+            if (curr_row_id + *rl) >= start_row_id {
+                // Since it's unlikely that the desired row falls on a new RLE
+                // boundary we need to account for a partial RLE entry and only
+                // populate some of the remaining entry
+                let remainder = (curr_row_id + rl) - start_row_id;
+                let row_entry = self.index_entry.get(idx).unwrap();
+                result.extend(vec![row_entry; remainder as usize]);
+            }
+
+            // move onto next RLE entry.
+            curr_row_id += *rl;
+        }
+        result
+    }
+
+    // // get the logical value at the provided index, or scan to the next value
+    // // that is non-null.
+    // pub fn scan_from_until_some(&self, index: usize) -> Option<&String> {
+    //     if index < self.total as usize {
+    //         let mut total = 0;
+    //         for (idx, rl) in &self.run_lengths {
+    //             if total + rl > index as u64 {
+    //                 // If there is a value then return otherwise continue.
+    //                 if let Some(v) = self.index_entry.get(idx) {
+    //                     return v.as_ref();
+    //                 }
+    //             }
+    //             total += rl;
+    //         }
+    //     }
+    //     None
+    // }
+
     // values materialises a vector of references to all logical values in the
     // encoding.
     pub fn values(&mut self) -> Vec<Option<&String>> {
@@ -315,6 +386,46 @@ mod test {
         assert_eq!(drle.value(8).unwrap(), "zoo");
     }
 
+    #[test]
+    fn dict_rle_scan_from() {
+        let mut drle = super::DictionaryRLE::new();
+        let west = Some("west".to_string());
+        let east = Some("east".to_string());
+        let north = Some("north".to_string());
+        drle.push_additional(west.clone(), 3);
+        drle.push_additional(east.clone(), 2);
+        drle.push_additional(north.clone(), 4);
+
+        // all entries
+        let results = drle.scan_from(0);
+        let mut exp = vec![&west; 3];
+        exp.extend(vec![&east; 2].iter());
+        exp.extend(vec![&north; 4].iter());
+        assert_eq!(results, exp);
+
+        // partial results from an RLE entry
+        let results = drle.scan_from(2);
+        let mut exp = vec![&west; 1]; // notice partial results
+        exp.extend(vec![&east; 2].iter());
+        exp.extend(vec![&north; 4].iter());
+        assert_eq!(results, exp);
+
+        // right on a boundary
+        let results = drle.scan_from(3);
+        let mut exp = vec![&east; 2];
+        exp.extend(vec![&north; 4].iter());
+        assert_eq!(results, exp);
+
+        // partial final result
+        let results = drle.scan_from(6);
+        assert_eq!(results, vec![&north; 3]);
+
+        // out of bounds
+        let results = drle.scan_from(9);
+        let exp: Vec<&Option<String>> = vec![];
+        assert_eq!(results, exp);
+    }
+
     #[test]
     fn row_ids() {
         let mut drle = super::DictionaryRLE::new();
@@ -335,7 +446,8 @@ mod test {
         let ids = drle
             .row_ids(Some("foo".to_string()))
             .collect::<Vec<usize>>();
-        assert_eq!(ids, vec![]);
+        let empty: Vec<usize> = vec![];
+        assert_eq!(ids, empty);
     }
 
     #[test]
@@ -361,6 +473,7 @@ mod test {
             .row_ids_roaring(Some("foo".to_string()))
             .iter()
             .collect::<Vec<u32>>();
-        assert_eq!(ids, vec![]);
+        let empty: Vec<u32> = vec![];
+        assert_eq!(ids, empty);
     }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 4b4a58289a..94953000a4 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -3,7 +3,7 @@ use std::collections::BTreeMap;
 use super::column;
 use super::column::Column;
 
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct Segment {
     meta: SegmentMetaData,
 
@@ -14,9 +14,13 @@ pub struct Segment {
 
 impl Segment {
     pub fn new(rows: usize) -> Self {
-        let mut segment = Self::default();
-        segment.meta.rows = rows;
-        segment
+        let mut meta = SegmentMetaData::default();
+        meta.rows = rows;
+        Self {
+            meta,
+            columns: vec![],
+            time_column_idx: 0,
+        }
     }
 
     pub fn num_rows(&self) -> usize {
@@ -40,6 +44,15 @@ impl Segment {
     }
 
     pub fn add_column(&mut self, name: &str, c: column::Column) {
+        assert_eq!(
+            self.meta.rows,
+            c.num_rows(),
+            "Column {:?} has {:?} rows but wanted {:?}",
+            name,
+            c.num_rows(),
+            self.meta.rows
+        );
+
         // TODO(edd) yuk
         if name == "time" {
             if let column::Column::Integer(ts) = &c {
@@ -49,7 +62,6 @@ impl Segment {
             }
             self.time_column_idx = self.columns.len();
         }
-        self.meta.rows = c.num_rows();
 
         // validate column doesn't already exist in segment
         assert!(!self.meta.column_names.contains(&name.to_owned()));
@@ -85,6 +97,13 @@ impl Segment {
         }
         column_sizes
     }
+
+    pub fn scan_from(&self, column_name: &str, row_id: usize) -> Option<column::Vector> {
+        if let Some(i) = self.column_names().iter().position(|c| c == column_name) {
+            return self.columns[i].scan_from(row_id);
+        }
+        None
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
@@ -114,11 +133,14 @@ impl<'a> Segments<'a> {
         Self { segments }
     }
 
+    pub fn segments(&self) -> &Vec<&'a Segment> {
+        &self.segments
+    }
+
     pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> {
         let mut segments: Vec<&Segment> = vec![];
         for segment in &self.segments {
             if segment.meta.overlaps_time_range(min, max) {
-                println!("Segement {:?} overlaps", segment.meta);
                 segments.push(segment);
             }
         }
@@ -186,7 +208,7 @@ impl<'a> Segments<'a> {
     /// Returns the first value for a column in a set of segments.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+    pub fn first(&self, column_name: &str, min_ts: i64) -> Option<(i64, Option<column::Scalar>)> {
         if self.segments.is_empty() {
             return None;
         }
@@ -195,12 +217,14 @@ impl<'a> Segments<'a> {
         for segment in &self.segments {
             // first find the logical row id of the minimum timestamp value
             if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
-                // TODO(edd): clean up unwr
-                let min_ts = ts_col.column_range().0;
-                let min_ts_id = ts_col.row_id_for_value(min_ts).unwrap();
+                let first_ts_id = ts_col.row_id_ge_value(min_ts)?;
 
+                println!("first ts is {:?}", first_ts_id);
                 // now we have row id we can get value for that row id
-                let value = segment.column(column_name).unwrap().value(min_ts_id);
+                let value = segment
+                    .column(column_name)
+                    .unwrap()
+                    .scan_from_until_some(first_ts_id);
 
                 match &first_first {
                     Some(prev) => {
@@ -230,7 +254,7 @@ impl<'a> Segments<'a> {
             if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
                 // TODO(edd): clean up unwr
                 let max_ts = ts_col.column_range().1;
-                let max_ts_id = ts_col.row_id_for_value(max_ts).unwrap();
+                let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap();
 
                 // now we have row id we can get value for that row id
                 let value = segment.column(column_name).unwrap().value(max_ts_id);

From 150a5a9c81ee7d9377e1b8675aca3568c1ff8f04 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 6 Aug 2020 13:51:17 +0100
Subject: [PATCH 11/73] feat: get row by id

---
 delorean_mem_qe/src/bin/main.rs |  26 ++++++-
 delorean_mem_qe/src/column.rs   |   2 +
 delorean_mem_qe/src/segment.rs  | 131 ++++++++++++++++++--------------
 3 files changed, 102 insertions(+), 57 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 44776f035d..92c0144c60 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -53,6 +53,10 @@ fn main() {
     // );
     // println!("{:?}", res);
     // let segments = segments.filter_by_time(1590036110000000, 1590044410000000);
+    // println!("{:?}", segments.last("host"));
+    // println!("{:?}", segments.segments().last().unwrap().row(14899));
+
+    time_row_by_id(&store, 14899);
 }
 
 fn build_store(
@@ -197,7 +201,7 @@ fn time_column_first(store: &Store) {
     for _ in 1..repeat {
         let now = std::time::Instant::now();
         let segments = store.segments();
-        let res = segments.first("host", 0).unwrap();
+        let res = segments.first("host").unwrap();
         total_time += now.elapsed();
         total_max += res.0;
     }
@@ -209,3 +213,23 @@ fn time_column_first(store: &Store) {
         total_max
     );
 }
+
+fn time_row_by_id(store: &Store, row_id: usize) {
+    let repeat = 100000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    for _ in 1..repeat {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let res = segments.segments().last().unwrap().row(row_id).unwrap();
+        total_time += now.elapsed();
+        total_max += res.len();
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 727e8f2c2e..524d2b17b7 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -153,6 +153,7 @@ impl Column {
         }
     }
 
+    /// Returns the minimum value contained within this column.
     // FIXME(edd): Support NULL integers and floats
     pub fn min(&self) -> Option<Scalar> {
         match self {
@@ -167,6 +168,7 @@ impl Column {
         }
     }
 
+    /// Returns the maximum value contained within this column.
     // FIXME(edd): Support NULL integers and floats
     pub fn max(&self) -> Option<Scalar> {
         match self {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 94953000a4..0b07ff2dbf 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -98,12 +98,25 @@ impl Segment {
         column_sizes
     }
 
-    pub fn scan_from(&self, column_name: &str, row_id: usize) -> Option<column::Vector> {
+    pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option<column::Vector> {
         if let Some(i) = self.column_names().iter().position(|c| c == column_name) {
             return self.columns[i].scan_from(row_id);
         }
         None
     }
+
+    pub fn row(&self, row_id: usize) -> Option<Vec<Option<column::Scalar>>> {
+        if row_id >= self.num_rows() {
+            return None;
+        }
+
+        Some(
+            self.columns
+                .iter()
+                .map(|c| c.value(row_id))
+                .collect::<Vec<Option<column::Scalar>>>(),
+        )
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
@@ -137,6 +150,14 @@ impl<'a> Segments<'a> {
         &self.segments
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.segments.is_empty()
+    }
+
+    pub fn len(&self) -> usize {
+        self.segments.len()
+    }
+
     pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> {
         let mut segments: Vec<&Segment> = vec![];
         for segment in &self.segments {
@@ -207,70 +228,68 @@ impl<'a> Segments<'a> {
 
     /// Returns the first value for a column in a set of segments.
     ///
+    /// The first value is based on the time column, therefore the returned value
+    /// may not be at the end of the column.
+    ///
+    /// If the time column has multiple max time values then the result is abitrary.
+    ///
     /// TODO(edd): could return NULL value..
-    pub fn first(&self, column_name: &str, min_ts: i64) -> Option<(i64, Option<column::Scalar>)> {
-        if self.segments.is_empty() {
-            return None;
+    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+        // First let's find the segment with the latest time range.
+        // notice we order  a < b on max time range.
+        let segment = self
+            .segments
+            .iter()
+            .min_by(|a, b| a.meta.time_range.0.cmp(&b.meta.time_range.0))?;
+
+        // first find the logical row id of the minimum timestamp value
+        if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
+            // TODO(edd): clean up unwrap
+            let min_ts = ts_col.column_range().0;
+            assert_eq!(min_ts, segment.meta.time_range.0);
+
+            let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap();
+
+            println!("first ts is {:?} at row {:?}", min_ts, min_ts_id);
+            // now we have row id we can get value for that row id
+            let value = segment.column(column_name).unwrap().value(min_ts_id);
+            Some((min_ts, value))
+        } else {
+            panic!("time column wrong type!");
         }
-
-        let mut first_first: Option<(i64, Option<column::Scalar>)> = None;
-        for segment in &self.segments {
-            // first find the logical row id of the minimum timestamp value
-            if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
-                let first_ts_id = ts_col.row_id_ge_value(min_ts)?;
-
-                println!("first ts is {:?}", first_ts_id);
-                // now we have row id we can get value for that row id
-                let value = segment
-                    .column(column_name)
-                    .unwrap()
-                    .scan_from_until_some(first_ts_id);
-
-                match &first_first {
-                    Some(prev) => {
-                        if prev.0 > min_ts {
-                            first_first = Some((min_ts, value));
-                        }
-                    }
-                    None => first_first = Some((min_ts, value)),
-                }
-            }
-        }
-
-        first_first
     }
 
     /// Returns the last value for a column in a set of segments.
     ///
+    /// The last value is based on the time column, therefore the returned value
+    /// may not be at the end of the column.
+    ///
+    /// If the time column has multiple max time values then the result is abitrary.
+    ///
     /// TODO(edd): could return NULL value..
     pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
-        if self.segments.is_empty() {
-            return None;
+        // First let's find the segment with the latest time range.
+        // notice we order a > b on max time range.
+        let segment = self
+            .segments
+            .iter()
+            .max_by(|a, b| a.meta.time_range.1.cmp(&b.meta.time_range.1))?;
+
+        // first find the logical row id of the minimum timestamp value
+        if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
+            // TODO(edd): clean up unwrap
+            let max_ts = ts_col.column_range().1;
+            assert_eq!(max_ts, segment.meta.time_range.1);
+
+            let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap();
+
+            println!("last ts is {:?} at row {:?}", max_ts, max_ts_id);
+            // now we have row id we can get value for that row id
+            let value = segment.column(column_name).unwrap().value(max_ts_id);
+            Some((max_ts, value))
+        } else {
+            panic!("time column wrong type!");
         }
-
-        let mut last_last: Option<(i64, Option<column::Scalar>)> = None;
-        for segment in &self.segments {
-            // first find the logical row id of the minimum timestamp value
-            if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
-                // TODO(edd): clean up unwr
-                let max_ts = ts_col.column_range().1;
-                let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap();
-
-                // now we have row id we can get value for that row id
-                let value = segment.column(column_name).unwrap().value(max_ts_id);
-
-                match &last_last {
-                    Some(prev) => {
-                        if prev.0 < max_ts {
-                            last_last = Some((max_ts, value));
-                        }
-                    }
-                    None => last_last = Some((max_ts, value)),
-                }
-            }
-        }
-
-        last_last
     }
 }
 

From ee8ac1b909bb4cdd2a6280de6ff61e49c0e6fcb7 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 6 Aug 2020 14:43:02 +0100
Subject: [PATCH 12/73] refactor: return row id

---
 delorean_mem_qe/src/bin/main.rs | 10 ++++++----
 delorean_mem_qe/src/segment.rs  | 10 ++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 92c0144c60..f8634fc053 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -56,7 +56,7 @@ fn main() {
     // println!("{:?}", segments.last("host"));
     // println!("{:?}", segments.segments().last().unwrap().row(14899));
 
-    time_row_by_id(&store, 14899);
+    time_row_by_last_ts(&store);
 }
 
 fn build_store(
@@ -214,13 +214,15 @@ fn time_column_first(store: &Store) {
     );
 }
 
-fn time_row_by_id(store: &Store, row_id: usize) {
+fn time_row_by_last_ts(store: &Store) {
     let repeat = 100000;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
     let mut total_max = 0;
-    for _ in 1..repeat {
+    let segments = store.segments();
+    for _ in 0..repeat {
         let now = std::time::Instant::now();
-        let segments = store.segments();
+
+        let (_, _, row_id) = segments.last("time").unwrap();
         let res = segments.segments().last().unwrap().row(row_id).unwrap();
         total_time += now.elapsed();
         total_max += res.len();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 0b07ff2dbf..716392e5d1 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -234,7 +234,7 @@ impl<'a> Segments<'a> {
     /// If the time column has multiple max time values then the result is abitrary.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>, usize)> {
         // First let's find the segment with the latest time range.
         // notice we order  a < b on max time range.
         let segment = self
@@ -250,10 +250,9 @@ impl<'a> Segments<'a> {
 
             let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap();
 
-            println!("first ts is {:?} at row {:?}", min_ts, min_ts_id);
             // now we have row id we can get value for that row id
             let value = segment.column(column_name).unwrap().value(min_ts_id);
-            Some((min_ts, value))
+            Some((min_ts, value, min_ts_id))
         } else {
             panic!("time column wrong type!");
         }
@@ -267,7 +266,7 @@ impl<'a> Segments<'a> {
     /// If the time column has multiple max time values then the result is abitrary.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>)> {
+    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>, usize)> {
         // First let's find the segment with the latest time range.
         // notice we order a > b on max time range.
         let segment = self
@@ -283,10 +282,9 @@ impl<'a> Segments<'a> {
 
             let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap();
 
-            println!("last ts is {:?} at row {:?}", max_ts, max_ts_id);
             // now we have row id we can get value for that row id
             let value = segment.column(column_name).unwrap().value(max_ts_id);
-            Some((max_ts, value))
+            Some((max_ts, value, max_ts_id))
         } else {
             panic!("time column wrong type!");
         }

From e3e1611e821c78814d4124035b75c3a8ed14959b Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 6 Aug 2020 17:05:39 +0100
Subject: [PATCH 13/73] feat: predicate pushdown

---
 delorean_mem_qe/src/bin/main.rs |  75 +++++++++++++++-
 delorean_mem_qe/src/column.rs   | 149 ++++++++++++++++++++++++++++----
 delorean_mem_qe/src/encoding.rs |  77 ++++++++++++++++-
 delorean_mem_qe/src/segment.rs  |  93 ++++++++++++++++----
 4 files changed, 358 insertions(+), 36 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index f8634fc053..91f36dfcd5 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -56,7 +56,35 @@ fn main() {
     // println!("{:?}", segments.last("host"));
     // println!("{:?}", segments.segments().last().unwrap().row(14899));
 
-    time_row_by_last_ts(&store);
+    // time_row_by_last_ts(&store);
+
+    let rows = segments
+        .segments()
+        .last()
+        .unwrap()
+        .filter_by_predicate_eq(
+            Some((1590040770000000, 1590040790000000)),
+            vec![
+                ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
+                ("method", Some(&column::Scalar::String("GET"))),
+                (
+                    "host",
+                    Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
+                ),
+            ],
+        )
+        .unwrap();
+
+    for row_id in rows.iter() {
+        println!(
+            "{:?} - {:?}",
+            row_id,
+            segments.segments().last().unwrap().row(row_id as usize)
+        );
+    }
+    println!("{:?}", rows.cardinality());
+
+    time_row_by_preds(&store);
 }
 
 fn build_store(
@@ -235,3 +263,48 @@ fn time_row_by_last_ts(store: &Store) {
         total_max
     );
 }
+
+fn time_row_by_preds(store: &Store) {
+    let repeat = 100000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let rows = segments
+            .segments()
+            .last()
+            .unwrap()
+            .filter_by_predicate_eq(
+                Some((1590040770000000, 1590040790000000)),
+                vec![
+                    ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
+                    ("method", Some(&column::Scalar::String("GET"))),
+                    (
+                        "host",
+                        Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
+                    ),
+                ],
+            )
+            .unwrap();
+
+        // for row_id in rows.iter() {
+        //     println!(
+        //         "{:?} - {:?}",
+        //         row_id,
+        //         segments.segments().last().unwrap().row(row_id as usize)
+        //     );
+        // }
+
+        total_time += now.elapsed();
+        total_max += rows.cardinality();
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 524d2b17b7..bf1ca53c8f 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -127,27 +127,89 @@ impl Column {
         }
     }
 
-    pub fn maybe_contains(&self, value: &Scalar) -> bool {
+    pub fn maybe_contains(&self, value: Option<&Scalar>) -> bool {
         match self {
-            Column::String(c) => {
-                if let Scalar::String(v) = value {
-                    c.meta.maybe_contains_value(&v.to_string())
-                } else {
-                    panic!("invalid value");
+            Column::String(c) => match value {
+                Some(scalar) => {
+                    if let Scalar::String(v) = scalar {
+                        c.meta.maybe_contains_value(Some(v.to_string()))
+                    } else {
+                        panic!("invalid value");
+                    }
                 }
-            }
+                None => c.meta.maybe_contains_value(None),
+            },
             Column::Float(c) => {
-                if let Scalar::Float(v) = value {
+                if let Some(Scalar::Float(v)) = value {
                     c.meta.maybe_contains_value(v.to_owned())
                 } else {
-                    panic!("invalid value");
+                    panic!("invalid value or unsupported null");
                 }
             }
             Column::Integer(c) => {
-                if let Scalar::Integer(v) = value {
+                if let Some(Scalar::Integer(v)) = value {
                     c.meta.maybe_contains_value(v.to_owned())
                 } else {
-                    panic!("invalid value");
+                    panic!("invalid value or unsupported null");
+                }
+            }
+        }
+    }
+
+    /// returns true if the column cannot contain
+    pub fn max_less_than(&self, value: Option<&Scalar>) -> bool {
+        match self {
+            Column::String(c) => match value {
+                Some(scalar) => {
+                    if let Scalar::String(v) = scalar {
+                        c.meta.range().1 < Some(&v.to_string())
+                    } else {
+                        panic!("invalid value");
+                    }
+                }
+                None => c.meta.range().1 < None,
+            },
+            Column::Float(c) => {
+                if let Some(Scalar::Float(v)) = value {
+                    c.meta.range().1 < *v
+                } else {
+                    panic!("invalid value or unsupported null");
+                }
+            }
+            Column::Integer(c) => {
+                if let Some(Scalar::Integer(v)) = value {
+                    c.meta.range().1 < *v
+                } else {
+                    panic!("invalid value or unsupported null");
+                }
+            }
+        }
+    }
+
+    pub fn min_greater_than(&self, value: Option<&Scalar>) -> bool {
+        match self {
+            Column::String(c) => match value {
+                Some(scalar) => {
+                    if let Scalar::String(v) = scalar {
+                        c.meta.range().0 > Some(&v.to_string())
+                    } else {
+                        panic!("invalid value");
+                    }
+                }
+                None => c.meta.range().0 > None,
+            },
+            Column::Float(c) => {
+                if let Some(Scalar::Float(v)) = value {
+                    c.meta.range().0 > *v
+                } else {
+                    panic!("invalid value or unsupported null");
+                }
+            }
+            Column::Integer(c) => {
+                if let Some(Scalar::Integer(v)) = value {
+                    c.meta.range().0 > *v
+                } else {
+                    panic!("invalid value or unsupported null");
                 }
             }
         }
@@ -182,6 +244,62 @@ impl Column {
             Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)),
         }
     }
+
+    // TODO(edd) shouldn't let roaring stuff leak out...
+    pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+        if !self.maybe_contains(value) {
+            return None;
+        }
+        self.row_ids(value, std::cmp::Ordering::Equal)
+    }
+
+    pub fn row_ids_gt(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+        if self.max_less_than(value) {
+            return None;
+        }
+        self.row_ids(value, std::cmp::Ordering::Greater)
+    }
+
+    pub fn row_ids_lt(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+        if self.min_greater_than(value) {
+            return None;
+        }
+        self.row_ids(value, std::cmp::Ordering::Less)
+    }
+
+    // TODO(edd) shouldn't let roaring stuff leak out...
+    fn row_ids(
+        &self,
+        value: Option<&Scalar>,
+        order: std::cmp::Ordering,
+    ) -> Option<croaring::Bitmap> {
+        match self {
+            Column::String(c) => match value {
+                Some(scalar) => {
+                    if let Scalar::String(v) = scalar {
+                        Some(c.data.row_ids_roaring(Some(v.to_string())))
+                    } else {
+                        panic!("invalid value");
+                    }
+                }
+                None => Some(c.data.row_ids_roaring(None)),
+            },
+            Column::Float(c) => {
+                if let Some(Scalar::Float(v)) = value {
+                    Some(c.data.row_ids_roaring(v, order))
+                } else {
+                    panic!("invalid value or unsupported null");
+                }
+            }
+            Column::Integer(c) => {
+                if let Some(Scalar::Integer(v)) = value {
+                    Some(c.data.row_ids_roaring(v, order))
+                } else {
+                    panic!("invalid value or unsupported null");
+                }
+            }
+        }
+    }
 }
 
 impl From<&[f64]> for Column {
@@ -388,13 +506,8 @@ pub mod metadata {
             self.num_rows
         }
 
-        pub fn maybe_contains_value(&self, v: &str) -> bool {
-            let res = self.range.0 <= Some(v.to_string()) && Some(v.to_string()) <= self.range.1;
-            println!(
-                "column with ({:?}) maybe contain {:?} -- {:?}",
-                self.range, v, res
-            );
-            res
+        pub fn maybe_contains_value(&self, v: Option<String>) -> bool {
+            self.range.0 <= v && v <= self.range.1
         }
 
         pub fn range(&self) -> (Option<&String>, Option<&String>) {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 596001d379..cb33423e2b 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -54,6 +54,40 @@ where
     pub fn scan_from(&self, row_id: usize) -> &[T] {
         &self.values[row_id..]
     }
+
+    /// returns a set of row ids that match an ordering on a desired value
+    pub fn row_ids_roaring(&self, wanted: &T, order: std::cmp::Ordering) -> croaring::Bitmap {
+        let mut bm = croaring::Bitmap::create();
+
+        let mut found = false; //self.values[0];
+        let mut count = 0;
+        for (i, next) in self.values.iter().enumerate() {
+            if next.partial_cmp(wanted) != Some(order) && found {
+                let (min, max) = (i as u64 - count as u64, i as u64);
+                bm.add_range(min..max);
+                found = false;
+                count = 0;
+                continue;
+            } else if next.partial_cmp(wanted) != Some(order) {
+                continue;
+            }
+
+            if !found {
+                found = true;
+            }
+            count += 1;
+        }
+
+        // add any remaining range.
+        if found {
+            let (min, max) = (
+                (self.values.len()) as u64 - count as u64,
+                (self.values.len()) as u64,
+            );
+            bm.add_range(min..max);
+        }
+        bm
+    }
 }
 
 impl From<&[i64]> for PlainFixed<i64> {
@@ -337,6 +371,45 @@ impl std::convert::From<&delorean_table::Packer<delorean_table::ByteArray>> for
 
 #[cfg(test)]
 mod test {
+    #[test]
+    fn plain_row_ids_roaring_eq() {
+        let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];
+        let col = super::PlainFixed::from(input.as_slice());
+
+        let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Equal);
+        assert_eq!(bm.to_vec(), vec![5, 6]);
+
+        let bm = col.row_ids_roaring(&1, std::cmp::Ordering::Equal);
+        assert_eq!(bm.to_vec(), vec![0, 1, 2, 3, 12]);
+
+        let bm = col.row_ids_roaring(&6, std::cmp::Ordering::Equal);
+        assert_eq!(bm.to_vec(), vec![8]);
+
+        let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Equal);
+        assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]);
+
+        let bm = col.row_ids_roaring(&20, std::cmp::Ordering::Equal);
+        assert_eq!(bm.to_vec(), vec![]);
+    }
+
+    #[test]
+    fn plain_row_ids_roaring_gt() {
+        let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];
+        let col = super::PlainFixed::from(input.as_slice());
+
+        let bm = col.row_ids_roaring(&0, std::cmp::Ordering::Greater);
+        let exp: Vec<u32> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
+        assert_eq!(bm.to_vec(), exp);
+
+        let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Greater);
+        let exp: Vec<u32> = vec![7, 8, 9, 10, 11, 13];
+        assert_eq!(bm.to_vec(), exp);
+
+        let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Greater);
+        let exp: Vec<u32> = vec![8];
+        assert_eq!(bm.to_vec(), exp);
+    }
+
     #[test]
     fn dict_rle() {
         let mut drle = super::DictionaryRLE::new();
@@ -427,7 +500,7 @@ mod test {
     }
 
     #[test]
-    fn row_ids() {
+    fn rle_dict_row_ids() {
         let mut drle = super::DictionaryRLE::new();
         drle.push_additional(Some("abc".to_string()), 3);
         drle.push_additional(Some("dre".to_string()), 2);
@@ -451,7 +524,7 @@ mod test {
     }
 
     #[test]
-    fn row_ids_roaring() {
+    fn dict_rle_row_ids_roaring() {
         let mut drle = super::DictionaryRLE::new();
         drle.push_additional(Some("abc".to_string()), 3);
         drle.push_additional(Some("dre".to_string()), 2);
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 716392e5d1..0d815c2225 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -117,6 +117,61 @@ impl Segment {
                 .collect::<Vec<Option<column::Scalar>>>(),
         )
     }
+
+    pub fn filter_by_predicate_eq(
+        &self,
+        time_range: Option<(i64, i64)>,
+        predicates: Vec<(&str, Option<&column::Scalar>)>,
+    ) -> Option<croaring::Bitmap> {
+        let mut bm = None;
+        if let Some((min, max)) = time_range {
+            if !self.meta.overlaps_time_range(min, max) {
+                return None; // segment doesn't have time range
+            }
+
+            // TODO THIS COULD BE FASTER!
+
+            // find all timestamps row ids > min time
+            let rows_gt_min =
+                self.columns[self.time_column_idx].row_ids_gt(Some(&column::Scalar::Integer(min)));
+            // find all timestamps < max time
+            let rows_lt_max =
+                self.columns[self.time_column_idx].row_ids_lt(Some(&column::Scalar::Integer(max)));
+
+            // Finally intersect matching timestamp rows
+            if rows_gt_min.is_none() && rows_lt_max.is_none() {
+                return None;
+            } else if rows_gt_min.is_none() {
+                bm = rows_lt_max;
+            } else if rows_lt_max.is_none() {
+                bm = rows_gt_min;
+            } else {
+                let mut rows = rows_gt_min.unwrap();
+                rows.and_inplace(&rows_lt_max.unwrap());
+                if rows.is_empty() {
+                    return None;
+                }
+                bm = Some(rows);
+            }
+        }
+
+        // now intersect matching rows for each column
+        let mut bm = bm.unwrap();
+        for (col_pred_name, col_pred_value) in predicates {
+            if let Some(c) = self.column(col_pred_name) {
+                match c.row_ids_eq(col_pred_value) {
+                    Some(row_ids) => {
+                        bm.and_inplace(&row_ids);
+                        if bm.is_empty() {
+                            return None;
+                        }
+                    }
+                    None => return None, // if this predicate doesn't match then no rows match
+                }
+            }
+        }
+        Some(bm)
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
@@ -168,21 +223,29 @@ impl<'a> Segments<'a> {
         Self::new(segments)
     }
 
-    pub fn filter_by_predicate_eq(
-        &self,
-        column_name: &str,
-        value: &column::Scalar,
-    ) -> Segments<'a> {
-        let mut segments: Vec<&Segment> = vec![];
-        for segment in &self.segments {
-            if let Some(col) = segment.column(column_name) {
-                if col.maybe_contains(&value) {
-                    segments.push(segment);
-                }
-            }
-        }
-        Self::new(segments)
-    }
+    // pub fn filter_by_predicate_eq(
+    //     &self,
+    //     time_range: Option<(i64, i64)>,
+    //     predicates: Vec<(&str, &column::Scalar)>,
+    // ) -> Option<croaring::Bitmap> {
+    //     let bm = None;
+    //     for segment in self.segments {
+    //         if let Some((min, max)) = time_range {
+    //             if !segment.meta.overlaps_time_range(min, max) {
+    //                 continue; // segment doesn't have time range
+    //             }
+    //         }
+
+    //         // build set of
+
+    //         if let Some(col) = segment.column(column_name) {
+    //             if col.maybe_contains(&value) {
+    //                 segments.push(segment);
+    //             }
+    //         }
+    //     }
+    //     Self::new(segments)
+    // }
 
     /// Returns the minimum value for a column in a set of segments.
     pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {

From da9d3cd52803d038d2a413ecebcfcb5779b96c04 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 6 Aug 2020 18:41:56 +0100
Subject: [PATCH 14/73] feat: grouping and aggregate

---
 delorean_mem_qe/src/bin/main.rs | 24 +++++++++++++-
 delorean_mem_qe/src/column.rs   | 29 +++++++++++++++++
 delorean_mem_qe/src/encoding.rs | 58 ++++++++++++++++++++++++++++++---
 delorean_mem_qe/src/segment.rs  | 35 +++++++++++++++++---
 4 files changed, 137 insertions(+), 9 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 91f36dfcd5..bce6fe2384 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -84,7 +84,29 @@ fn main() {
     }
     println!("{:?}", rows.cardinality());
 
-    time_row_by_preds(&store);
+    // time_row_by_preds(&store);
+
+    let group_ids = segments
+        .segments()
+        .last()
+        .unwrap()
+        .group_by_column_ids("env")
+        .unwrap();
+
+    for (col_values, row_ids) in group_ids {
+        let (min, max) = segments.segments().last().unwrap().time_range();
+        println!(
+            "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?}",
+            min,
+            max,
+            col_values,
+            segments
+                .segments()
+                .last()
+                .unwrap()
+                .sum_column(&"counter", &row_ids)
+        );
+    }
 }
 
 fn build_store(
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index bf1ca53c8f..5163c9a5b2 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -245,6 +245,24 @@ impl Column {
         }
     }
 
+    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> Option<Scalar> {
+        match self {
+            Column::String(_) => unimplemented!("not implemented"),
+            Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))),
+            Column::Integer(_) => unimplemented!("not implemented"),
+        }
+    }
+
+    pub fn group_by_ids(
+        &self,
+    ) -> &std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap> {
+        match self {
+            Column::String(c) => c.data.group_row_ids(),
+            Column::Float(_) => unimplemented!("not implemented"),
+            Column::Integer(_) => unimplemented!("not implemented"),
+        }
+    }
+
     // TODO(edd) shouldn't let roaring stuff leak out...
     pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
         if !self.maybe_contains(value) {
@@ -353,6 +371,13 @@ impl String {
         unreachable!("don't need this");
         // self.data.scan_from_until_some(row_id)
     }
+
+    // TODO(edd) shouldn't let roaring stuff leak out...
+    pub fn group_row_ids(
+        &self,
+    ) -> &std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap> {
+        self.data.group_row_ids()
+    }
 }
 
 #[derive(Debug, Default)]
@@ -383,6 +408,10 @@ impl Float {
     pub fn scan_from_until_some(&self, row_id: usize) -> Option<f64> {
         self.data.scan_from_until_some(row_id)
     }
+
+    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> f64 {
+        self.data.sum_by_ids(row_ids)
+    }
 }
 
 impl From<&[f64]> for Float {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index cb33423e2b..4b856f4dac 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -23,7 +23,7 @@ pub struct PlainFixed<T> {
 
 impl<T> PlainFixed<T>
 where
-    T: PartialEq + PartialOrd + Copy + std::fmt::Debug,
+    T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
 {
     pub fn size(&self) -> usize {
         self.values.len() * std::mem::size_of::<T>()
@@ -88,6 +88,17 @@ where
         }
         bm
     }
+
+    // TODO(edd): make faster
+    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> T {
+        let mut res = T::default();
+        row_ids.iter().for_each(|x| res += self.value(x as usize));
+        res
+    }
+
+    pub fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
+        row_ids.cardinality()
+    }
 }
 
 impl From<&[i64]> for PlainFixed<i64> {
@@ -113,6 +124,9 @@ pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
     entry_index: BTreeMap<Option<String>, usize>,
 
+    // Experiment - store rows that each entry has a value for
+    entry_row_ids: BTreeMap<Option<String>, croaring::Bitmap>,
+
     // stores the mapping between an index and its entry.
     index_entry: BTreeMap<usize, Option<String>>,
 
@@ -130,6 +144,7 @@ impl DictionaryRLE {
     pub fn new() -> Self {
         Self {
             entry_index: BTreeMap::new(),
+            entry_row_ids: BTreeMap::new(),
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
@@ -147,7 +162,6 @@ impl DictionaryRLE {
     }
 
     pub fn push_additional(&mut self, v: Option<String>, additional: u64) {
-        self.total += additional;
         let idx = self.entry_index.get(&v);
         match idx {
             Some(idx) => {
@@ -160,6 +174,10 @@ impl DictionaryRLE {
                         self.run_lengths.push((*idx, additional));
                         self.run_length_size += std::mem::size_of::<(usize, u64)>();
                     }
+                    self.entry_row_ids
+                        .get_mut(&v)
+                        .unwrap()
+                        .add_range(self.total..self.total + additional);
                 }
             }
             None => {
@@ -168,18 +186,24 @@ impl DictionaryRLE {
                     let idx = self.entry_index.len();
 
                     self.entry_index.insert(v.clone(), idx);
+                    self.entry_row_ids
+                        .insert(v.clone(), croaring::Bitmap::create());
                     if let Some(value) = &v {
                         self.map_size += value.len();
                     }
-                    self.index_entry.insert(idx, v);
+                    self.index_entry.insert(idx, v.clone());
                     self.map_size += 8 + std::mem::size_of::<usize>(); // TODO(edd): clean this option size up
 
                     self.run_lengths.push((idx, additional));
+                    self.entry_row_ids
+                        .get_mut(&v)
+                        .unwrap()
+                        .add_range(self.total..self.total + additional);
                     self.run_length_size += std::mem::size_of::<(usize, u64)>();
-                    return;
                 }
             }
         }
+        self.total += additional;
     }
 
     // row_ids returns an iterator over the set of row ids matching the provided
@@ -216,6 +240,11 @@ impl DictionaryRLE {
         bm
     }
 
+    // get the set of row ids for each distinct value
+    pub fn group_row_ids(&self) -> &BTreeMap<Option<String>, croaring::Bitmap> {
+        &self.entry_row_ids
+    }
+
     // row_ids returns an iterator over the set of row ids matching the provided
     // value
     // pub fn row_ids(&'a self, value: &str) -> impl iter::Iterator<Item = usize> {
@@ -457,6 +486,27 @@ mod test {
         assert_eq!(drle.value(6).unwrap(), "zoo");
         assert_eq!(drle.value(7).unwrap(), "zoo");
         assert_eq!(drle.value(8).unwrap(), "zoo");
+
+        let row_ids = drle
+            .entry_row_ids
+            .get(&Some("hello".to_string()))
+            .unwrap()
+            .to_vec();
+        assert_eq!(row_ids, vec![0, 1, 3, 4, 5]);
+
+        let row_ids = drle
+            .entry_row_ids
+            .get(&Some("world".to_string()))
+            .unwrap()
+            .to_vec();
+        assert_eq!(row_ids, vec![2]);
+
+        let row_ids = drle
+            .entry_row_ids
+            .get(&Some("zoo".to_string()))
+            .unwrap()
+            .to_vec();
+        assert_eq!(row_ids, vec![6, 7, 8]);
     }
 
     #[test]
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 0d815c2225..cb6e934535 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -118,6 +118,23 @@ impl Segment {
         )
     }
 
+    pub fn group_by_column_ids(
+        &self,
+        name: &str,
+    ) -> Option<&std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap>> {
+        if let Some(c) = self.column(name) {
+            return Some(c.group_by_ids());
+        }
+        None
+    }
+
+    pub fn sum_column(&self, name: &str, row_ids: &croaring::Bitmap) -> Option<column::Scalar> {
+        if let Some(c) = self.column(name) {
+            return c.sum_by_ids(row_ids);
+        }
+        None
+    }
+
     pub fn filter_by_predicate_eq(
         &self,
         time_range: Option<(i64, i64)>,
@@ -156,21 +173,31 @@ impl Segment {
         }
 
         // now intersect matching rows for each column
-        let mut bm = bm.unwrap();
+        // let mut bm = bm.unwrap();
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
                 match c.row_ids_eq(col_pred_value) {
                     Some(row_ids) => {
-                        bm.and_inplace(&row_ids);
-                        if bm.is_empty() {
+                        if row_ids.is_empty() {
                             return None;
                         }
+
+                        match &mut bm {
+                            Some(all) => {
+                                all.and_inplace(&row_ids);
+                                if all.is_empty() {
+                                    // no rows intersect
+                                    return None;
+                                }
+                            }
+                            None => bm = Some(row_ids),
+                        }
                     }
                     None => return None, // if this predicate doesn't match then no rows match
                 }
             }
         }
-        Some(bm)
+        bm
     }
 }
 

From 759254b381d6259364c700bdb3986866ff8b2c0e Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 10 Aug 2020 15:08:45 +0100
Subject: [PATCH 15/73] feat: add support for time >= x and time < y

---
 delorean_mem_qe/src/bin/main.rs |  99 ++++++++++----------
 delorean_mem_qe/src/column.rs   |  91 ++++++++++++++++---
 delorean_mem_qe/src/encoding.rs | 155 ++++++++++++++++++++++++++++----
 delorean_mem_qe/src/segment.rs  | 141 +++++++++++++++++++----------
 4 files changed, 362 insertions(+), 124 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index bce6fe2384..d553c72451 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -58,54 +58,61 @@ fn main() {
 
     // time_row_by_last_ts(&store);
 
-    let rows = segments
-        .segments()
-        .last()
-        .unwrap()
-        .filter_by_predicate_eq(
-            Some((1590040770000000, 1590040790000000)),
-            vec![
-                ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
-                ("method", Some(&column::Scalar::String("GET"))),
-                (
-                    "host",
-                    Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
-                ),
-            ],
-        )
-        .unwrap();
+    // let rows = segments
+    //     .segments()
+    //     .last()
+    //     .unwrap()
+    //     .filter_by_predicate_eq(
+    //         Some((1590040770000000, 1590040790000000)),
+    //         vec![
+    //             ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
+    //             ("method", Some(&column::Scalar::String("GET"))),
+    //             (
+    //                 "host",
+    //                 Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
+    //             ),
+    //         ],
+    //     )
+    //     .unwrap();
 
-    for row_id in rows.iter() {
-        println!(
-            "{:?} - {:?}",
-            row_id,
-            segments.segments().last().unwrap().row(row_id as usize)
-        );
-    }
-    println!("{:?}", rows.cardinality());
+    // for row_id in rows.iter() {
+    //     println!(
+    //         "{:?} - {:?}",
+    //         row_id,
+    //         segments.segments().last().unwrap().row(row_id as usize)
+    //     );
+    // }
+    // println!("{:?}", rows.cardinality());
 
     // time_row_by_preds(&store);
+    loop {
+        let mut total_count = 0.0;
+        let now = std::time::Instant::now();
+        for segment in segments.segments() {
+            let (min, max) = segment.time_range();
+            let time_ids = segment.filter_by_predicates_eq((min, max), vec![]).unwrap();
 
-    let group_ids = segments
-        .segments()
-        .last()
-        .unwrap()
-        .group_by_column_ids("env")
-        .unwrap();
-
-    for (col_values, row_ids) in group_ids {
-        let (min, max) = segments.segments().last().unwrap().time_range();
-        println!(
-            "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?}",
-            min,
-            max,
-            col_values,
-            segments
-                .segments()
-                .last()
-                .unwrap()
-                .sum_column(&"counter", &row_ids)
-        );
+            let group_ids = segment.group_by_column_ids("env").unwrap();
+            for (col_values, row_ids) in group_ids {
+                // filter ids by time
+                let mut result = row_ids.and(&time_ids);
+                // let
+                // println!(
+                //     "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})",
+                //     min,
+                //     max,
+                //     col_values,
+                //     segment.sum_column(&"counter", &result),
+                //     result.cardinality(),
+                // );
+                if let column::Scalar::Float(x) =
+                    segment.sum_column(&"counter", &mut result).unwrap()
+                {
+                    total_count += x;
+                }
+            }
+        }
+        println!("Done ({:?}) in {:?}", total_count, now.elapsed());
     }
 }
 
@@ -298,8 +305,8 @@ fn time_row_by_preds(store: &Store) {
             .segments()
             .last()
             .unwrap()
-            .filter_by_predicate_eq(
-                Some((1590040770000000, 1590040790000000)),
+            .filter_by_predicates_eq(
+                (1590040770000000, 1590040790000000),
                 vec![
                     ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
                     ("method", Some(&column::Scalar::String("GET"))),
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 5163c9a5b2..5b75e49d7f 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -245,7 +245,7 @@ impl Column {
         }
     }
 
-    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> Option<Scalar> {
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar> {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))),
@@ -285,6 +285,66 @@ impl Column {
         self.row_ids(value, std::cmp::Ordering::Less)
     }
 
+    // allows you to do:
+    //      WHERE time >= 0 AND time < 100
+    //
+    // or
+    //
+    //      WHERE counter >= 102.2 AND counter < 2929.32
+    pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option<croaring::Bitmap> {
+        match self {
+            Column::String(c) => {
+                unimplemented!("not implemented yet");
+            }
+            Column::Float(c) => {
+                let (col_min, col_max) = c.meta.range();
+                if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) {
+                    if *low >= col_min && *high < col_max {
+                        // In this case the column completely covers the range.
+                        // TODO: PERF - need to _not_ return a bitset rather than
+                        // return a full one. Need to differentiate between "no values"
+                        // and "all values" in the context of an Option. Right now
+                        // None means "no values"
+                        //
+                        let mut bm = croaring::Bitmap::create();
+                        bm.add_range(0..c.meta.num_rows() as u64); // all rows
+                        return Some(bm);
+                    }
+
+                    // The column has some values that are outside of the
+                    // desired range so we need to determine the set of matching
+                    // row ids.
+                    Some(c.data.row_ids_gte_lt_roaring(low, high))
+                } else {
+                    panic!("not supposed to be here");
+                }
+            }
+            Column::Integer(c) => {
+                let (col_min, col_max) = c.meta.range();
+                if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) {
+                    if *low >= col_min && *high < col_max {
+                        // In this case the column completely covers the range.
+                        // TODO: PERF - need to _not_ return a bitset rather than
+                        // return a full one. Need to differentiate between "no values"
+                        // and "all values" in the context of an Option. Right now
+                        // None means "no values"
+                        //
+                        let mut bm = croaring::Bitmap::create();
+                        bm.add_range(0..c.meta.num_rows() as u64); // all rows
+                        return Some(bm);
+                    }
+
+                    // The column has some values that are outside of the
+                    // desired range so we need to determine the set of matching
+                    // row ids.
+                    Some(c.data.row_ids_gte_lt_roaring(low, high))
+                } else {
+                    panic!("not supposed to be here");
+                }
+            }
+        }
+    }
+
     // TODO(edd) shouldn't let roaring stuff leak out...
     fn row_ids(
         &self,
@@ -292,26 +352,31 @@ impl Column {
         order: std::cmp::Ordering,
     ) -> Option<croaring::Bitmap> {
         match self {
-            Column::String(c) => match value {
-                Some(scalar) => {
-                    if let Scalar::String(v) = scalar {
-                        Some(c.data.row_ids_roaring(Some(v.to_string())))
-                    } else {
-                        panic!("invalid value");
-                    }
+            Column::String(c) => {
+                if order != std::cmp::Ordering::Equal {
+                    unimplemented!("> < not supported on strings yet");
                 }
-                None => Some(c.data.row_ids_roaring(None)),
-            },
+                match value {
+                    Some(scalar) => {
+                        if let Scalar::String(v) = scalar {
+                            Some(c.data.row_ids_eq_roaring(Some(v.to_string())))
+                        } else {
+                            panic!("invalid value");
+                        }
+                    }
+                    None => Some(c.data.row_ids_eq_roaring(None)),
+                }
+            }
             Column::Float(c) => {
                 if let Some(Scalar::Float(v)) = value {
-                    Some(c.data.row_ids_roaring(v, order))
+                    Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
                 }
             }
             Column::Integer(c) => {
                 if let Some(Scalar::Integer(v)) = value {
-                    Some(c.data.row_ids_roaring(v, order))
+                    Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
                 }
@@ -409,7 +474,7 @@ impl Float {
         self.data.scan_from_until_some(row_id)
     }
 
-    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> f64 {
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 {
         self.data.sum_by_ids(row_ids)
     }
 }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 4b856f4dac..a575e240a0 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -18,6 +18,7 @@ impl<T> PlainFixedOption<T> {
 // No compression
 pub struct PlainFixed<T> {
     values: Vec<T>,
+    buf: Vec<u32>,
     total_order: bool, // if true the column is totally ordered ascending.
 }
 
@@ -55,8 +56,14 @@ where
         &self.values[row_id..]
     }
 
-    /// returns a set of row ids that match an ordering on a desired value
-    pub fn row_ids_roaring(&self, wanted: &T, order: std::cmp::Ordering) -> croaring::Bitmap {
+    /// returns a set of row ids that match a single ordering on a desired value
+    ///
+    /// This supports `value = x` , `value < x` or `value > x`.
+    pub fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &T,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap {
         let mut bm = croaring::Bitmap::create();
 
         let mut found = false; //self.values[0];
@@ -89,10 +96,100 @@ where
         bm
     }
 
+    /// returns a set of row ids that match the half open interval `[from, to)`.
+    ///
+    /// The main use-case for this is time range filtering.
+    pub fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap {
+        let mut bm = croaring::Bitmap::create();
+
+        let mut found = false; //self.values[0];
+        let mut count = 0;
+        for (i, next) in self.values.iter().enumerate() {
+            if (next < from || next >= to) && found {
+                let (min, max) = (i as u64 - count as u64, i as u64);
+                bm.add_range(min..max);
+                found = false;
+                count = 0;
+                continue;
+            } else if next < from || next >= to {
+                continue;
+            }
+
+            if !found {
+                found = true;
+            }
+            count += 1;
+        }
+
+        // add any remaining range.
+        if found {
+            let (min, max) = (
+                (self.values.len()) as u64 - count as u64,
+                (self.values.len()) as u64,
+            );
+            bm.add_range(min..max);
+        }
+        bm
+    }
+
     // TODO(edd): make faster
-    pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> T {
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
         let mut res = T::default();
-        row_ids.iter().for_each(|x| res += self.value(x as usize));
+        // println!(
+        //     "cardinality is {:?} out of {:?}",
+        //     row_ids.cardinality(),
+        //     self.values.len()
+        // );
+
+        // HMMMMM - materialising which has a memory cost.
+        // let vec = row_ids.to_vec();
+        // for v in vec.chunks_exact(4) {
+        //     res += self.value(v[0] as usize);
+        //     res += self.value(v[1] as usize);
+        //     res += self.value(v[2] as usize);
+        //     res += self.value(v[3] as usize);
+        // }
+
+        // HMMMMM - materialising which has a memory cost.
+        let vec = row_ids.to_vec();
+        for v in vec {
+            res += self.value(v as usize);
+        }
+
+        // for v in row_ids.iter() {
+        //     res += self.value(v as usize);
+        // }
+
+        // let step = 16_u64;
+        // for i in (0..self.values.len() as u64).step_by(step as usize) {
+        //     if row_ids.contains_range(i..i + step) {
+        //         res += self.value(i as usize + 15);
+        //         res += self.value(i as usize + 14);
+        //         res += self.value(i as usize + 13);
+        //         res += self.value(i as usize + 12);
+        //         res += self.value(i as usize + 11);
+        //         res += self.value(i as usize + 10);
+        //         res += self.value(i as usize + 9);
+        //         res += self.value(i as usize + 8);
+        //         res += self.value(i as usize + 7);
+        //         res += self.value(i as usize + 6);
+        //         res += self.value(i as usize + 5);
+        //         res += self.value(i as usize + 4);
+        //         res += self.value(i as usize + 3);
+        //         res += self.value(i as usize + 2);
+        //         res += self.value(i as usize + 1);
+        //         res += self.value(i as usize);
+        //         continue;
+        //     }
+
+        //     for j in i..i + step {
+        //         if row_ids.contains(j as u32) {
+        //             res += self.value(j as usize);
+        //         }
+        //     }
+        //  }
+
+        // row_ids.iter().for_each(|x| res += self.value(x as usize));
         res
     }
 
@@ -105,6 +202,7 @@ impl From<&[i64]> for PlainFixed<i64> {
     fn from(v: &[i64]) -> Self {
         Self {
             values: v.to_vec(),
+            buf: Vec::with_capacity(v.len()),
             total_order: false,
         }
     }
@@ -114,6 +212,7 @@ impl From<&[f64]> for PlainFixed<f64> {
     fn from(v: &[f64]) -> Self {
         Self {
             values: v.to_vec(),
+            buf: Vec::with_capacity(v.len()),
             total_order: false,
         }
     }
@@ -225,7 +324,7 @@ impl DictionaryRLE {
 
     // row_ids returns an iterator over the set of row ids matching the provided
     // value.
-    pub fn row_ids_roaring(&self, value: Option<String>) -> croaring::Bitmap {
+    pub fn row_ids_eq_roaring(&self, value: Option<String>) -> croaring::Bitmap {
         let mut bm = croaring::Bitmap::create();
         if let Some(idx) = self.entry_index.get(&value) {
             let mut index: u64 = 0;
@@ -405,40 +504,62 @@ mod test {
         let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];
         let col = super::PlainFixed::from(input.as_slice());
 
-        let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Equal);
+        let bm = col.row_ids_single_cmp_roaring(&4, std::cmp::Ordering::Equal);
         assert_eq!(bm.to_vec(), vec![5, 6]);
 
-        let bm = col.row_ids_roaring(&1, std::cmp::Ordering::Equal);
+        let bm = col.row_ids_single_cmp_roaring(&1, std::cmp::Ordering::Equal);
         assert_eq!(bm.to_vec(), vec![0, 1, 2, 3, 12]);
 
-        let bm = col.row_ids_roaring(&6, std::cmp::Ordering::Equal);
+        let bm = col.row_ids_single_cmp_roaring(&6, std::cmp::Ordering::Equal);
         assert_eq!(bm.to_vec(), vec![8]);
 
-        let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Equal);
+        let bm = col.row_ids_single_cmp_roaring(&5, std::cmp::Ordering::Equal);
         assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]);
 
-        let bm = col.row_ids_roaring(&20, std::cmp::Ordering::Equal);
+        let bm = col.row_ids_single_cmp_roaring(&20, std::cmp::Ordering::Equal);
         assert_eq!(bm.to_vec(), vec![]);
     }
 
     #[test]
-    fn plain_row_ids_roaring_gt() {
+    fn plain_row_ids_cmp_roaring_gt() {
         let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];
         let col = super::PlainFixed::from(input.as_slice());
 
-        let bm = col.row_ids_roaring(&0, std::cmp::Ordering::Greater);
+        let bm = col.row_ids_single_cmp_roaring(&0, std::cmp::Ordering::Greater);
         let exp: Vec<u32> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
         assert_eq!(bm.to_vec(), exp);
 
-        let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Greater);
+        let bm = col.row_ids_single_cmp_roaring(&4, std::cmp::Ordering::Greater);
         let exp: Vec<u32> = vec![7, 8, 9, 10, 11, 13];
         assert_eq!(bm.to_vec(), exp);
 
-        let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Greater);
+        let bm = col.row_ids_single_cmp_roaring(&5, std::cmp::Ordering::Greater);
         let exp: Vec<u32> = vec![8];
         assert_eq!(bm.to_vec(), exp);
     }
 
+    #[test]
+    fn plain_row_ids_gte_lt_roaring() {
+        let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];
+        let col = super::PlainFixed::from(input.as_slice());
+
+        let bm = col.row_ids_gte_lt_roaring(&-1, &7);
+        let exp: Vec<u32> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
+        assert_eq!(bm.to_vec(), exp);
+
+        let bm = col.row_ids_gte_lt_roaring(&1, &5);
+        let exp: Vec<u32> = vec![0, 1, 2, 3, 4, 5, 6, 12];
+        assert_eq!(bm.to_vec(), exp);
+
+        let bm = col.row_ids_gte_lt_roaring(&0, &1);
+        let exp: Vec<u32> = vec![];
+        assert_eq!(bm.to_vec(), exp);
+
+        let bm = col.row_ids_gte_lt_roaring(&1, &2);
+        let exp: Vec<u32> = vec![0, 1, 2, 3, 12];
+        assert_eq!(bm.to_vec(), exp);
+    }
+
     #[test]
     fn dict_rle() {
         let mut drle = super::DictionaryRLE::new();
@@ -581,19 +702,19 @@ mod test {
         drle.push("abc");
 
         let ids = drle
-            .row_ids_roaring(Some("abc".to_string()))
+            .row_ids_eq_roaring(Some("abc".to_string()))
             .iter()
             .collect::<Vec<u32>>();
         assert_eq!(ids, vec![0, 1, 2, 5]);
 
         let ids = drle
-            .row_ids_roaring(Some("dre".to_string()))
+            .row_ids_eq_roaring(Some("dre".to_string()))
             .iter()
             .collect::<Vec<u32>>();
         assert_eq!(ids, vec![3, 4]);
 
         let ids = drle
-            .row_ids_roaring(Some("foo".to_string()))
+            .row_ids_eq_roaring(Some("foo".to_string()))
             .iter()
             .collect::<Vec<u32>>();
         let empty: Vec<u32> = vec![];
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index cb6e934535..aad3153c95 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -14,10 +14,8 @@ pub struct Segment {
 
 impl Segment {
     pub fn new(rows: usize) -> Self {
-        let mut meta = SegmentMetaData::default();
-        meta.rows = rows;
         Self {
-            meta,
+            meta: SegmentMetaData::new(rows),
             columns: vec![],
             time_column_idx: 0,
         }
@@ -128,52 +126,46 @@ impl Segment {
         None
     }
 
-    pub fn sum_column(&self, name: &str, row_ids: &croaring::Bitmap) -> Option<column::Scalar> {
+    pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
         }
         None
     }
 
-    pub fn filter_by_predicate_eq(
+    pub fn filter_by_predicates_eq(
         &self,
-        time_range: Option<(i64, i64)>,
+        time_range: (i64, i64),
         predicates: Vec<(&str, Option<&column::Scalar>)>,
     ) -> Option<croaring::Bitmap> {
-        let mut bm = None;
-        if let Some((min, max)) = time_range {
-            if !self.meta.overlaps_time_range(min, max) {
-                return None; // segment doesn't have time range
-            }
-
-            // TODO THIS COULD BE FASTER!
-
-            // find all timestamps row ids > min time
-            let rows_gt_min =
-                self.columns[self.time_column_idx].row_ids_gt(Some(&column::Scalar::Integer(min)));
-            // find all timestamps < max time
-            let rows_lt_max =
-                self.columns[self.time_column_idx].row_ids_lt(Some(&column::Scalar::Integer(max)));
-
-            // Finally intersect matching timestamp rows
-            if rows_gt_min.is_none() && rows_lt_max.is_none() {
-                return None;
-            } else if rows_gt_min.is_none() {
-                bm = rows_lt_max;
-            } else if rows_lt_max.is_none() {
-                bm = rows_gt_min;
-            } else {
-                let mut rows = rows_gt_min.unwrap();
-                rows.and_inplace(&rows_lt_max.unwrap());
-                if rows.is_empty() {
-                    return None;
-                }
-                bm = Some(rows);
-            }
+        if !self.meta.overlaps_time_range(time_range.0, time_range.1) {
+            return None; // segment doesn't have time range
         }
 
+        let (seg_min, seg_max) = self.meta.time_range;
+        if seg_min <= time_range.0 && seg_max >= time_range.1 {
+            // the segment completely overlaps the time range of query so don't
+            // need to intersect with time column.
+            return self.filter_by_predicates_eq_no_time(predicates);
+        }
+
+        self.filter_by_predicates_eq_time(time_range, predicates)
+    }
+
+    fn filter_by_predicates_eq_time(
+        &self,
+        time_range: (i64, i64),
+        predicates: Vec<(&str, Option<&column::Scalar>)>,
+    ) -> Option<croaring::Bitmap> {
+        // Get all row_ids matching the time range:
+        //
+        //  time > time_range.0 AND time < time_range.1
+        let mut bm = self.columns[self.time_column_idx].row_ids_gte_lt(
+            &column::Scalar::Integer(time_range.0),
+            &column::Scalar::Integer(time_range.1),
+        )?;
+
         // now intersect matching rows for each column
-        // let mut bm = bm.unwrap();
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
                 match c.row_ids_eq(col_pred_value) {
@@ -182,38 +174,91 @@ impl Segment {
                             return None;
                         }
 
-                        match &mut bm {
-                            Some(all) => {
-                                all.and_inplace(&row_ids);
-                                if all.is_empty() {
-                                    // no rows intersect
-                                    return None;
-                                }
-                            }
-                            None => bm = Some(row_ids),
+                        bm.and_inplace(&row_ids);
+                        if bm.is_empty() {
+                            return None;
                         }
                     }
                     None => return None, // if this predicate doesn't match then no rows match
                 }
             }
         }
-        bm
+        Some(bm)
+    }
+
+    // in this case the complete time range of segment covered so no need to intersect
+    // on time.
+    //
+    // We return an &Option here because we don't want to move the read-only
+    // meta row_ids bitmap.
+    fn filter_by_predicates_eq_no_time(
+        &self,
+        predicates: Vec<(&str, Option<&column::Scalar>)>,
+    ) -> Option<croaring::Bitmap> {
+        let mut bm: Option<croaring::Bitmap> = None;
+        // now intersect matching rows for each column
+        for (col_pred_name, col_pred_value) in predicates {
+            if let Some(c) = self.column(col_pred_name) {
+                match c.row_ids_eq(col_pred_value) {
+                    Some(row_ids) => {
+                        if row_ids.is_empty() {
+                            return None;
+                        }
+
+                        if let Some(bm) = &mut bm {
+                            bm.and_inplace(&row_ids);
+                            if bm.is_empty() {
+                                return None;
+                            }
+                        } else {
+                            bm = Some(row_ids);
+                        }
+                    }
+                    None => {
+                        return None;
+                    } // if this predicate doesn't match then no rows match
+                }
+            } else {
+                return None; // column doesn't exist - no matching rows
+            }
+        }
+
+        // In this case there are no predicates provided and we have no time
+        // range restrictions - we need to return a bitset for all row ids.
+        let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32);
+        bm.add_range(0..self.num_rows() as u64);
+        Some(bm)
     }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
 /// may contain value for answering a query.
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct SegmentMetaData {
     size: usize, // TODO
     rows: usize,
 
     column_names: Vec<String>,
     time_range: (i64, i64),
+
+    // row_ids is a bitmap containing all row ids.
+    row_ids: croaring::Bitmap,
     // TODO column sort order
 }
 
 impl SegmentMetaData {
+    pub fn new(rows: usize) -> Self {
+        let mut meta = Self {
+            size: 0,
+            rows,
+            column_names: vec![],
+            time_range: (0, 0),
+            row_ids: croaring::Bitmap::create_with_capacity(rows as u32),
+        };
+        meta.row_ids.add_range(0..rows as u64);
+        meta
+    }
+
     pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {
         self.time_range.0 <= to && from <= self.time_range.1
     }

From 3fef4ff1106c1401b1e64416c304b9dc4a992f0f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 11 Aug 2020 11:31:35 +0100
Subject: [PATCH 16/73] feat: add equivalent of ReadFilter

---
 delorean_mem_qe/src/bin/main.rs | 131 ++++++++++++++++++------------
 delorean_mem_qe/src/column.rs   | 115 ++++++++++++++++++++------
 delorean_mem_qe/src/encoding.rs | 140 +++++++++++++++++++++++---------
 delorean_mem_qe/src/segment.rs  | 121 +++++++++++++++++----------
 4 files changed, 348 insertions(+), 159 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index d553c72451..f02b246ad3 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -39,9 +39,9 @@ fn main() {
     // time_column_min_time(&store);
     // time_column_max_time(&store);
     // time_column_first(&store);
-    let segments = store.segments();
-    let res = segments.last("host").unwrap();
-    println!("{:?}", res);
+    // let segments = store.segments();
+    // let res = segments.last("host").unwrap();
+    // println!("{:?}", res);
 
     // let segments = segments
     //     .filter_by_time(1590036110000000, 1590044410000000)
@@ -85,35 +85,64 @@ fn main() {
     // println!("{:?}", rows.cardinality());
 
     // time_row_by_preds(&store);
-    loop {
-        let mut total_count = 0.0;
-        let now = std::time::Instant::now();
-        for segment in segments.segments() {
-            let (min, max) = segment.time_range();
-            let time_ids = segment.filter_by_predicates_eq((min, max), vec![]).unwrap();
 
-            let group_ids = segment.group_by_column_ids("env").unwrap();
-            for (col_values, row_ids) in group_ids {
-                // filter ids by time
-                let mut result = row_ids.and(&time_ids);
-                // let
-                // println!(
-                //     "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})",
-                //     min,
-                //     max,
-                //     col_values,
-                //     segment.sum_column(&"counter", &result),
-                //     result.cardinality(),
-                // );
-                if let column::Scalar::Float(x) =
-                    segment.sum_column(&"counter", &mut result).unwrap()
-                {
-                    total_count += x;
-                }
-            }
-        }
-        println!("Done ({:?}) in {:?}", total_count, now.elapsed());
+    let segments = store.segments();
+    let columns = segments.read_filter_eq(
+        (1590040770000000, 1590044410000000),
+        &[
+            ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
+            ("method", Some(&column::Scalar::String("GET"))),
+            (
+                "host",
+                Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
+            ),
+        ],
+        vec![
+            "env".to_string(),
+            "method".to_string(),
+            "host".to_string(),
+            "counter".to_string(),
+            "time".to_string(),
+        ],
+    );
+
+    for (k, v) in columns {
+        println!("COLUMN {:?}", k);
+        println!("ROWS ({:?}) {:?}", v.len(), 0);
+        // println!("ROWS ({:?}) {:?}", v, v.len());
     }
+
+    // loop {
+    //     let mut total_count = 0.0;
+    //     let now = std::time::Instant::now();
+    //     for segment in segments.segments() {
+    //         let (min, max) = segment.time_range();
+    //         let time_ids = segment
+    //             .filter_by_predicates_eq((min, max), &vec![])
+    //             .unwrap();
+
+    //         let group_ids = segment.group_by_column_ids("env").unwrap();
+    //         for (col_values, row_ids) in group_ids {
+    //             // filter ids by time
+    //             let mut result = row_ids.and(&time_ids);
+    //             // let
+    //             // println!(
+    //             //     "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})",
+    //             //     min,
+    //             //     max,
+    //             //     col_values,
+    //             //     segment.sum_column(&"counter", &result),
+    //             //     result.cardinality(),
+    //             // );
+    //             if let column::Scalar::Float(x) =
+    //                 segment.sum_column(&"counter", &mut result).unwrap()
+    //             {
+    //                 total_count += x;
+    //             }
+    //         }
+    //     }
+    //     println!("Done ({:?}) in {:?}", total_count, now.elapsed());
+    // }
 }
 
 fn build_store(
@@ -271,27 +300,27 @@ fn time_column_first(store: &Store) {
     );
 }
 
-fn time_row_by_last_ts(store: &Store) {
-    let repeat = 100000;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
-    let segments = store.segments();
-    for _ in 0..repeat {
-        let now = std::time::Instant::now();
+// fn time_row_by_last_ts(store: &Store) {
+//     let repeat = 100000;
+//     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+//     let mut total_max = 0;
+//     let segments = store.segments();
+//     for _ in 0..repeat {
+//         let now = std::time::Instant::now();
 
-        let (_, _, row_id) = segments.last("time").unwrap();
-        let res = segments.segments().last().unwrap().row(row_id).unwrap();
-        total_time += now.elapsed();
-        total_max += res.len();
-    }
-    println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        total_max
-    );
-}
+//         let (_, _, row_id) = segments.last("time").unwrap();
+//         let res = segments.segments().last().unwrap().row(row_id).unwrap();
+//         total_time += now.elapsed();
+//         total_max += res.len();
+//     }
+//     println!(
+//         "Ran {:?} in {:?} {:?} / call {:?}",
+//         repeat,
+//         total_time,
+//         total_time / repeat,
+//         total_max
+//     );
+// }
 
 fn time_row_by_preds(store: &Store) {
     let repeat = 100000;
@@ -307,7 +336,7 @@ fn time_row_by_preds(store: &Store) {
             .unwrap()
             .filter_by_predicates_eq(
                 (1590040770000000, 1590040790000000),
-                vec![
+                &vec![
                     ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
                     ("method", Some(&column::Scalar::String("GET"))),
                     (
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 5b75e49d7f..be0809cbb6 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -12,8 +12,47 @@ pub enum Scalar<'a> {
 #[derive(Debug)]
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
-    Float(&'a [f64]),
-    Integer(&'a [i64]),
+    Float(Vec<&'a f64>),
+    Integer(Vec<&'a i64>),
+}
+
+impl<'a> Vector<'a> {
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            Self::String(v) => v.len(),
+            Self::Float(v) => v.len(),
+            Self::Integer(v) => v.len(),
+        }
+    }
+    pub fn extend(&mut self, other: Self) {
+        match self {
+            Self::String(v) => {
+                if let Self::String(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::Float(v) => {
+                if let Self::Float(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::Integer(v) => {
+                if let Self::Integer(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -69,29 +108,45 @@ impl Column {
         }
     }
 
-    /// materialise all rows including and after row_id
-    pub fn scan_from(&self, row_id: usize) -> Option<Vector> {
-        if row_id >= self.num_rows() {
-            println!(
-                "asking for {:?} but only got {:?} rows",
-                row_id,
-                self.num_rows()
-            );
-            return None;
-        }
-
-        println!(
-            "asking for {:?} with a column having {:?} rows",
-            row_id,
-            self.num_rows()
+    /// materialise rows for each row_id
+    pub fn rows(&self, row_ids: &[usize]) -> Vector {
+        assert!(
+            row_ids.len() == 1 || row_ids[row_ids.len() - 1] > row_ids[0],
+            "got last row_id={:?} and first row_id={:?}",
+            row_ids[row_ids.len() - 1],
+            row_ids[0]
         );
         match self {
-            Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
-            Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
-            Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
+            Column::String(c) => Vector::String(c.values(row_ids)),
+            Column::Float(c) => Vector::Float(c.values(row_ids)),
+            Column::Integer(c) => Vector::Integer(c.values(row_ids)),
         }
     }
 
+    /// materialise all rows including and after row_id
+    pub fn scan_from(&self, row_id: usize) -> Option<Vector> {
+        unimplemented!("todo");
+        // if row_id >= self.num_rows() {
+        //     println!(
+        //         "asking for {:?} but only got {:?} rows",
+        //         row_id,
+        //         self.num_rows()
+        //     );
+        //     return None;
+        // }
+
+        // println!(
+        //     "asking for {:?} with a column having {:?} rows",
+        //     row_id,
+        //     self.num_rows()
+        // );
+        // match self {
+        //     Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
+        //     Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
+        //     Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
+        // }
+    }
+
     /// Given the provided row_id scans the column until a non-null value found
     /// or the column is exhausted.
     pub fn scan_from_until_some(&self, row_id: usize) -> Option<Scalar> {
@@ -299,8 +354,8 @@ impl Column {
             Column::Float(c) => {
                 let (col_min, col_max) = c.meta.range();
                 if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) {
-                    if *low >= col_min && *high < col_max {
-                        // In this case the column completely covers the range.
+                    if *low <= col_min && *high > col_max {
+                        // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
                         // return a full one. Need to differentiate between "no values"
                         // and "all values" in the context of an Option. Right now
@@ -322,8 +377,8 @@ impl Column {
             Column::Integer(c) => {
                 let (col_min, col_max) = c.meta.range();
                 if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) {
-                    if *low >= col_min && *high < col_max {
-                        // In this case the column completely covers the range.
+                    if *low <= col_min && *high > col_max {
+                        // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
                         // return a full one. Need to differentiate between "no values"
                         // and "all values" in the context of an Option. Right now
@@ -428,6 +483,10 @@ impl String {
         self.data.value(row_id)
     }
 
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<std::string::String>> {
+        self.data.values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> Vec<&Option<std::string::String>> {
         self.data.scan_from(row_id)
     }
@@ -466,6 +525,10 @@ impl Float {
         self.data.value(row_id)
     }
 
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&f64> {
+        self.data.values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[f64] {
         self.data.scan_from(row_id)
     }
@@ -519,6 +582,10 @@ impl Integer {
         self.data.value(row_id)
     }
 
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&i64> {
+        self.data.values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[i64] {
         self.data.scan_from(row_id)
     }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index a575e240a0..c2d0052f3e 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -43,6 +43,14 @@ where
         self.values[row_id]
     }
 
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&T> {
+        let mut out = Vec::with_capacity(row_ids.len());
+        for row_id in row_ids {
+            out.push(&self.values[*row_id]);
+        }
+        out
+    }
+
     // TODO(edd): fix this when added NULL support
     pub fn scan_from_until_some(&self, row_id: usize) -> Option<T> {
         unreachable!("to remove");
@@ -392,6 +400,61 @@ impl DictionaryRLE {
         None
     }
 
+    // materialises a vector of references to logical values in the
+    // encoding for each provided row_id.
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<String>> {
+        let mut out: Vec<&Option<String>> = Vec::with_capacity(row_ids.len());
+
+        let mut curr_logical_row_id = 0;
+
+        let mut run_lengths_iter = self.run_lengths.iter();
+        let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap();
+
+        for wanted_row_id in row_ids {
+            while curr_logical_row_id + curr_entry_rl <= *wanted_row_id as u64 {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                match run_lengths_iter.next() {
+                    Some(res) => {
+                        curr_entry_id = res.0;
+                        curr_entry_rl = res.1;
+                    }
+                    None => panic!("shouldn't get here"),
+                }
+            }
+
+            // this encoded entry covers the row_id we want.
+            let value = self.index_entry.get(&curr_entry_id).unwrap();
+            out.push(value);
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert_eq!(row_ids.len(), out.len());
+        out
+    }
+
+    // values materialises a vector of references to all logical values in the
+    // encoding.
+    pub fn all_values(&mut self) -> Vec<Option<&String>> {
+        let mut out: Vec<Option<&String>> = Vec::with_capacity(self.total as usize);
+
+        // build reverse mapping.
+        let mut idx_value = BTreeMap::new();
+        for (k, v) in &self.entry_index {
+            idx_value.insert(v, k);
+        }
+        assert_eq!(idx_value.len(), self.entry_index.len());
+
+        for (idx, rl) in &self.run_lengths {
+            // TODO(edd): fix unwrap - we know that the value exists in map...
+            let v = idx_value.get(&idx).unwrap().as_ref();
+            out.extend(iter::repeat(v).take(*rl as usize));
+        }
+        out
+    }
+
     // materialise a slice of rows starting from index.
     pub fn scan_from(&self, index: usize) -> Vec<&Option<String>> {
         let mut result = vec![];
@@ -428,44 +491,6 @@ impl DictionaryRLE {
         result
     }
 
-    // // get the logical value at the provided index, or scan to the next value
-    // // that is non-null.
-    // pub fn scan_from_until_some(&self, index: usize) -> Option<&String> {
-    //     if index < self.total as usize {
-    //         let mut total = 0;
-    //         for (idx, rl) in &self.run_lengths {
-    //             if total + rl > index as u64 {
-    //                 // If there is a value then return otherwise continue.
-    //                 if let Some(v) = self.index_entry.get(idx) {
-    //                     return v.as_ref();
-    //                 }
-    //             }
-    //             total += rl;
-    //         }
-    //     }
-    //     None
-    // }
-
-    // values materialises a vector of references to all logical values in the
-    // encoding.
-    pub fn values(&mut self) -> Vec<Option<&String>> {
-        let mut out: Vec<Option<&String>> = Vec::with_capacity(self.total as usize);
-
-        // build reverse mapping.
-        let mut idx_value = BTreeMap::new();
-        for (k, v) in &self.entry_index {
-            idx_value.insert(v, k);
-        }
-        assert_eq!(idx_value.len(), self.entry_index.len());
-
-        for (idx, rl) in &self.run_lengths {
-            // TODO(edd): fix unwrap - we know that the value exists in map...
-            let v = idx_value.get(&idx).unwrap().as_ref();
-            out.extend(iter::repeat(v).take(*rl as usize));
-        }
-        out
-    }
-
     pub fn size(&self) -> usize {
         // mapping and reverse mapping then the rles
         2 * self.map_size + self.run_length_size
@@ -571,7 +596,7 @@ mod test {
         drle.push_additional(Some("hello".to_string()), 1);
 
         assert_eq!(
-            drle.values(),
+            drle.all_values(),
             [
                 Some(&"hello".to_string()),
                 Some(&"hello".to_string()),
@@ -584,7 +609,7 @@ mod test {
 
         drle.push_additional(Some("zoo".to_string()), 3);
         assert_eq!(
-            drle.values(),
+            drle.all_values(),
             [
                 Some(&"hello".to_string()),
                 Some(&"hello".to_string()),
@@ -670,6 +695,41 @@ mod test {
         assert_eq!(results, exp);
     }
 
+    #[test]
+    fn dict_rle_values() {
+        let mut drle = super::DictionaryRLE::new();
+        let west = Some("west".to_string());
+        let east = Some("east".to_string());
+        let north = Some("north".to_string());
+        drle.push_additional(west.clone(), 3);
+        drle.push_additional(east.clone(), 2);
+        drle.push_additional(north.clone(), 4);
+        drle.push_additional(west.clone(), 3);
+
+        let results = drle.values(&[0, 1, 4, 5]);
+
+        // w,w,w,e,e,n,n,n,n,w, w, w
+        // 0 1 2 3 4 5 6 7 8 9 10 11
+        let exp = vec![&west, &west, &east, &north];
+        assert_eq!(results, exp);
+
+        let results = drle.values(&[10, 11]);
+        let exp = vec![&west, &west];
+        assert_eq!(results, exp);
+
+        let results = drle.values(&[0, 3, 5, 11]);
+        let exp = vec![&west, &east, &north, &west];
+        assert_eq!(results, exp);
+
+        let results = drle.values(&[0]);
+        let exp = vec![&west];
+        assert_eq!(results, exp);
+
+        let results = drle.values(&[0, 9]);
+        let exp = vec![&west, &west];
+        assert_eq!(results, exp);
+    }
+
     #[test]
     fn rle_dict_row_ids() {
         let mut drle = super::DictionaryRLE::new();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index aad3153c95..495bc8c62b 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -103,17 +103,32 @@ impl Segment {
         None
     }
 
-    pub fn row(&self, row_id: usize) -> Option<Vec<Option<column::Scalar>>> {
-        if row_id >= self.num_rows() {
-            return None;
+    // Materialise all rows for each desired column. `rows` expects `row_ids` to
+    // be ordered in ascending order.
+    //
+    // `columns` determines which column values are returned. An empty `columns`
+    // value will result in rows for all columns being returned.
+    pub fn rows(&self, row_ids: &[usize], columns: &[String]) -> BTreeMap<String, column::Vector> {
+        let mut rows: BTreeMap<String, column::Vector> = BTreeMap::new();
+        if row_ids.is_empty() {
+            // nothing to return
+            return rows;
         }
 
-        Some(
-            self.columns
-                .iter()
-                .map(|c| c.value(row_id))
-                .collect::<Vec<Option<column::Scalar>>>(),
-        )
+        let cols_to_process = if columns.is_empty() {
+            &self.meta.column_names
+        } else {
+            columns
+        };
+
+        for col_name in cols_to_process {
+            let column = self.column(col_name.as_str());
+            if let Some(column) = column {
+                rows.insert(col_name.clone(), column.rows(row_ids));
+            };
+        }
+
+        rows
     }
 
     pub fn group_by_column_ids(
@@ -136,20 +151,19 @@ impl Segment {
     pub fn filter_by_predicates_eq(
         &self,
         time_range: (i64, i64),
-        predicates: Vec<(&str, Option<&column::Scalar>)>,
+        predicates: &[(&str, Option<&column::Scalar>)],
     ) -> Option<croaring::Bitmap> {
         if !self.meta.overlaps_time_range(time_range.0, time_range.1) {
             return None; // segment doesn't have time range
         }
 
         let (seg_min, seg_max) = self.meta.time_range;
-        if seg_min <= time_range.0 && seg_max >= time_range.1 {
-            // the segment completely overlaps the time range of query so don't
-            // need to intersect with time column.
+        if time_range.0 <= seg_min && time_range.1 > seg_max {
+            // the segment is completely overlapped by the time range of query,
+            // so don't  need to intersect predicate results with time column.
             return self.filter_by_predicates_eq_no_time(predicates);
         }
-
-        self.filter_by_predicates_eq_time(time_range, predicates)
+        self.filter_by_predicates_eq_time(time_range, predicates.to_vec())
     }
 
     fn filter_by_predicates_eq_time(
@@ -193,13 +207,22 @@ impl Segment {
     // meta row_ids bitmap.
     fn filter_by_predicates_eq_no_time(
         &self,
-        predicates: Vec<(&str, Option<&column::Scalar>)>,
+        predicates: &[(&str, Option<&column::Scalar>)],
     ) -> Option<croaring::Bitmap> {
+        if predicates.is_empty() {
+            // In this case there are no predicates provided and we have no time
+            // range restrictions - we need to return a bitset for all row ids.
+            let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32);
+            bm.add_range(0..self.num_rows() as u64);
+            return Some(bm);
+        }
+
         let mut bm: Option<croaring::Bitmap> = None;
         // now intersect matching rows for each column
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
-                match c.row_ids_eq(col_pred_value) {
+                // TODO(edd): rework this clone
+                match c.row_ids_eq(*col_pred_value) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -222,12 +245,7 @@ impl Segment {
                 return None; // column doesn't exist - no matching rows
             }
         }
-
-        // In this case there are no predicates provided and we have no time
-        // range restrictions - we need to return a bitset for all row ids.
-        let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32);
-        bm.add_range(0..self.num_rows() as u64);
-        Some(bm)
+        bm
     }
 }
 
@@ -295,29 +313,44 @@ impl<'a> Segments<'a> {
         Self::new(segments)
     }
 
-    // pub fn filter_by_predicate_eq(
-    //     &self,
-    //     time_range: Option<(i64, i64)>,
-    //     predicates: Vec<(&str, &column::Scalar)>,
-    // ) -> Option<croaring::Bitmap> {
-    //     let bm = None;
-    //     for segment in self.segments {
-    //         if let Some((min, max)) = time_range {
-    //             if !segment.meta.overlaps_time_range(min, max) {
-    //                 continue; // segment doesn't have time range
-    //             }
-    //         }
+    // read_filter_eq returns rows of data for the desired columns. Results may
+    // be filtered by (currently) equality predicates and ranged by time.
+    pub fn read_filter_eq(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        select_columns: Vec<String>,
+    ) -> BTreeMap<String, column::Vector> {
+        let (min, max) = time_range;
+        if max <= min {
+            panic!("max <= min");
+        }
 
-    //         // build set of
+        let mut columns: BTreeMap<String, column::Vector> = BTreeMap::new();
+        for segment in &self.segments {
+            if !segment.meta.overlaps_time_range(min, max) {
+                continue; // segment doesn't have time range
+            }
 
-    //         if let Some(col) = segment.column(column_name) {
-    //             if col.maybe_contains(&value) {
-    //                 segments.push(segment);
-    //             }
-    //         }
-    //     }
-    //     Self::new(segments)
-    // }
+            if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) {
+                let bm_vec = bm.to_vec();
+                let row_ids = bm_vec.iter().map(|v| *v as usize).collect::<Vec<usize>>();
+
+                let rows = segment.rows(&row_ids, &select_columns);
+                for (k, v) in rows {
+                    let segment_values = columns.get_mut(&k);
+                    match segment_values {
+                        Some(values) => values.extend(v),
+                        None => {
+                            columns.insert(k.to_owned(), v);
+                        }
+                    }
+                }
+            };
+        }
+
+        columns
+    }
 
     /// Returns the minimum value for a column in a set of segments.
     pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {

From 2387b7c8498972e72380d729525d1f461dce094f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 11 Aug 2020 19:59:30 +0100
Subject: [PATCH 17/73] feat: add support for group by aggregate

---
 Cargo.lock                      |   1 +
 delorean_mem_qe/Cargo.toml      |   1 +
 delorean_mem_qe/src/bin/main.rs |  93 +++++++++++++++------
 delorean_mem_qe/src/column.rs   |  76 +++++++++++++++++-
 delorean_mem_qe/src/segment.rs  | 138 +++++++++++++++++++++++++++++++-
 5 files changed, 281 insertions(+), 28 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 328fb59d2e..effb3598b4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -765,6 +765,7 @@ name = "delorean_mem_qe"
 version = "0.1.0"
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "chrono",
  "croaring",
  "delorean_table",
  "snafu",
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index aaf38f1b7a..32531e888b 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -11,6 +11,7 @@ delorean_table = { path = "../delorean_table" }
 arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
 snafu = "0.6.8"
 croaring = "0.4.5"
+chrono = "0.4"
 
 [dev-dependencies]
 
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index f02b246ad3..8c7a89c1f0 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -5,7 +5,7 @@ use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
 use delorean_mem_qe::column::{Column, Scalar};
-use delorean_mem_qe::segment::Segment;
+use delorean_mem_qe::segment::{Aggregate, Segment};
 use delorean_mem_qe::Store;
 
 // use snafu::ensure;
@@ -36,6 +36,8 @@ fn main() {
         store.size(),
     );
 
+    time_group_by_agg(&store);
+
     // time_column_min_time(&store);
     // time_column_max_time(&store);
     // time_column_first(&store);
@@ -86,31 +88,37 @@ fn main() {
 
     // time_row_by_preds(&store);
 
-    let segments = store.segments();
-    let columns = segments.read_filter_eq(
-        (1590040770000000, 1590044410000000),
-        &[
-            ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
-            ("method", Some(&column::Scalar::String("GET"))),
-            (
-                "host",
-                Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
-            ),
-        ],
-        vec![
-            "env".to_string(),
-            "method".to_string(),
-            "host".to_string(),
-            "counter".to_string(),
-            "time".to_string(),
-        ],
-    );
+    // let segments = store.segments();
+    // let columns = segments.read_filter_eq(
+    //     (1590036110000000, 1590040770000000),
+    //     &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))],
+    //     vec![
+    //         "env".to_string(),
+    //         "method".to_string(),
+    //         "host".to_string(),
+    //         "counter".to_string(),
+    //         "time".to_string(),
+    //     ],
+    // );
 
-    for (k, v) in columns {
-        println!("COLUMN {:?}", k);
-        println!("ROWS ({:?}) {:?}", v.len(), 0);
-        // println!("ROWS ({:?}) {:?}", v, v.len());
-    }
+    // for (k, v) in columns {
+    //     println!("COLUMN {:?}", k);
+    //     // println!("ROWS ({:?}) {:?}", v.len(), 0);
+    //     println!("ROWS ({}) {:?}", v, v.len());
+    // }
+
+    // let now = std::time::Instant::now();
+    // let segments = store.segments();
+    // let groups = segments.read_group_eq(
+    //     (0, 1590044410000000),
+    //     &[],
+    //     vec!["env".to_string()],
+    //     vec![
+    //         // ("counter".to_string(), Aggregate::Sum),
+    //         ("counter".to_string(), Aggregate::Count),
+    //     ],
+    // );
+    // println!("{:?} {:?}", groups, now.elapsed());
 
     // loop {
     //     let mut total_count = 0.0;
@@ -149,7 +157,12 @@ fn build_store(
     mut reader: arrow::ipc::reader::StreamReader<File>,
     store: &mut Store,
 ) -> Result<(), Error> {
+    // let mut i = 0;
     while let Some(rb) = reader.next_batch().unwrap() {
+        // if i < 363 {
+        //     i += 1;
+        //     continue;
+        // }
         let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
@@ -366,3 +379,33 @@ fn time_row_by_preds(store: &Store) {
         total_max
     );
 }
+
+fn time_group_by_agg(store: &Store) {
+    let repeat = 100;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let groups = segments.read_group_eq(
+            (0, 1590044410000000),
+            &[("method", Some(&column::Scalar::String("GET")))],
+            vec!["env".to_string()],
+            vec![
+                ("counter".to_string(), Aggregate::Sum),
+                // ("counter".to_string(), Aggregate::Count),
+            ],
+        );
+
+        total_time += now.elapsed();
+        total_max += groups.len();
+    }
+    println!(
+        "Ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index be0809cbb6..4e55ed8196 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -2,14 +2,68 @@ use std::convert::From;
 
 use super::encoding;
 
-#[derive(Debug, PartialEq, PartialOrd)]
+#[derive(Debug, PartialEq, PartialOrd, Clone)]
 pub enum Scalar<'a> {
     String(&'a str),
     Float(f64),
     Integer(i64),
 }
 
-#[derive(Debug)]
+impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
+    type Output = Scalar<'a>;
+
+    fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
+        match self {
+            Self::Float(v) => {
+                if let Self::Float(other) = _rhs {
+                    return Self::Float(v + other);
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::Integer(v) => {
+                if let Self::Integer(other) = _rhs {
+                    return Self::Integer(v + other);
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::String(_) => {
+                unreachable!("not possible to add strings");
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum Aggregate<'a> {
+    Count(u64),
+    Sum(Scalar<'a>),
+}
+
+impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
+    type Output = Aggregate<'a>;
+
+    fn add(self, _rhs: &Aggregate<'a>) -> Self::Output {
+        match self {
+            Self::Count(c) => {
+                if let Self::Count(other) = _rhs {
+                    return Self::Count(c + other);
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::Sum(s) => {
+                if let Self::Sum(other) = _rhs {
+                    return Self::Sum(s + other);
+                } else {
+                    panic!("invalid");
+                };
+            }
+        }
+    }
+}
+
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
     Float(Vec<&'a f64>),
@@ -55,6 +109,24 @@ impl<'a> Vector<'a> {
     }
 }
 
+use chrono::prelude::*;
+
+impl<'a> std::fmt::Display for Vector<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::String(v) => write!(f, "{:?}", v),
+            Self::Float(v) => write!(f, "{:?}", v),
+            Self::Integer(v) => {
+                for x in v {
+                    let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0);
+                    write!(f, "{}, ", ts)?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
 #[derive(Debug)]
 pub enum Column {
     String(String),
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 495bc8c62b..e578578b7e 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -148,6 +148,18 @@ impl Segment {
         None
     }
 
+    // Returns the count aggregate for a given column name.
+    //
+    // Since we guarantee to provide row ids for the segment, and all columns
+    // have the same number of logical rows, the count is just the number of
+    // requested logical rows.
+    pub fn count_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<u64> {
+        if self.column(name).is_some() {
+            return Some(row_ids.cardinality() as u64);
+        }
+        None
+    }
+
     pub fn filter_by_predicates_eq(
         &self,
         time_range: (i64, i64),
@@ -247,6 +259,72 @@ impl Segment {
         }
         bm
     }
+
+    pub fn group_agg_by_predicate_eq(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &Vec<String>,
+        aggregates: &Vec<(String, Aggregate)>,
+    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        let mut grouped_results = BTreeMap::new();
+
+        let filter_row_ids: croaring::Bitmap;
+        match self.filter_by_predicates_eq(time_range, predicates) {
+            Some(row_ids) => filter_row_ids = row_ids,
+            None => {
+                return grouped_results;
+            }
+        }
+
+        if let Some(grouped_row_ids) = self.group_by_column_ids(&group_columns[0]) {
+            for (group_key_value, row_ids) in grouped_row_ids.iter() {
+                let mut filtered_row_ids = row_ids.and(&filter_row_ids);
+                if !filtered_row_ids.is_empty() {
+                    // First calculate all of the aggregates for this grouped value
+                    let mut aggs: Vec<((String, Aggregate), column::Aggregate)> =
+                        Vec::with_capacity(aggregates.len());
+
+                    for (col_name, agg) in aggregates {
+                        match &agg {
+                            Aggregate::Sum => {
+                                aggs.push((
+                                    (col_name.to_string(), agg.clone()),
+                                    column::Aggregate::Sum(
+                                        self.sum_column(col_name, &mut filtered_row_ids).unwrap(),
+                                    ), // assuming no non-null group keys
+                                ));
+                            }
+                            Aggregate::Count => {
+                                aggs.push((
+                                    (col_name.to_string(), agg.clone()),
+                                    column::Aggregate::Count(
+                                        self.count_column(col_name, &mut filtered_row_ids).unwrap(),
+                                    ), // assuming no non-null group keys
+                                ));
+                            }
+                        }
+                    }
+
+                    // Next add these aggregates to the result set, keyed
+                    // by the grouped value.
+                    assert_eq!(aggs.len(), aggregates.len());
+                    grouped_results.insert(vec![group_key_value.clone().unwrap()], aggs);
+                } else {
+                    // In this case there are grouped values in the column with no
+                    // rows falling into time-range/predicate set.
+                    println!(
+                        "grouped value {:?} has no rows in time-range/predicate set",
+                        group_key_value
+                    );
+                }
+            }
+        } else {
+            // segment doesn't have the column so can't group on it.
+            println!("don't have column - can't group");
+        }
+        grouped_results
+    }
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
@@ -282,6 +360,12 @@ impl SegmentMetaData {
     }
 }
 
+#[derive(Debug, Clone)]
+pub enum Aggregate {
+    Count,
+    Sum,
+}
+
 pub struct Segments<'a> {
     segments: Vec<&'a Segment>,
 }
@@ -331,7 +415,6 @@ impl<'a> Segments<'a> {
             if !segment.meta.overlaps_time_range(min, max) {
                 continue; // segment doesn't have time range
             }
-
             if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) {
                 let bm_vec = bm.to_vec();
                 let row_ids = bm_vec.iter().map(|v| *v as usize).collect::<Vec<usize>>();
@@ -352,6 +435,59 @@ impl<'a> Segments<'a> {
         columns
     }
 
+    // read_group_eq returns grouped aggregates of for the specified columns.
+    // Results may be filtered by (currently) equality predicates and ranged
+    // by time.
+    pub fn read_group_eq(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: Vec<String>,
+        aggregates: Vec<(String, Aggregate)>,
+    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        // TODO(edd): support multi column groups
+        assert_eq!(group_columns.len(), 1);
+
+        let (min, max) = time_range;
+        if max <= min {
+            panic!("max <= min");
+        }
+
+        let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
+            BTreeMap::new();
+
+        for segment in &self.segments {
+            let segment_results = segment.group_agg_by_predicate_eq(
+                time_range,
+                predicates,
+                &group_columns,
+                &aggregates,
+            );
+
+            for (k, segment_aggs) in segment_results {
+                // assert_eq!(v.len(), aggregates.len());
+                let cum_result = cum_results.get_mut(&k);
+                match cum_result {
+                    Some(cum) => {
+                        assert_eq!(cum.len(), segment_aggs.len());
+                        // In this case we need to aggregate the aggregates from
+                        // each segment.
+                        for i in 0..cum.len() {
+                            // TODO(edd): this is more expensive than necessary
+                            cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1);
+                        }
+                    }
+                    None => {
+                        cum_results.insert(k, segment_aggs);
+                    }
+                }
+            }
+        }
+
+        // columns
+        cum_results
+    }
+
     /// Returns the minimum value for a column in a set of segments.
     pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {
         if self.segments.is_empty() {

From 0d5b6489804c8fc828ab8550c6b2e7aec3e3ca6e Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 12 Aug 2020 11:45:33 +0100
Subject: [PATCH 18/73] feat: add support for returning encoded values

---
 delorean_mem_qe/src/bin/main.rs | 12 ++---
 delorean_mem_qe/src/column.rs   | 72 ++++++++++++++++++++++-----
 delorean_mem_qe/src/encoding.rs | 88 +++++++++++++++++++++++++++++++--
 delorean_mem_qe/src/segment.rs  | 45 ++++++++++++++---
 4 files changed, 186 insertions(+), 31 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 8c7a89c1f0..6efffbf8b5 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -157,12 +157,12 @@ fn build_store(
     mut reader: arrow::ipc::reader::StreamReader<File>,
     store: &mut Store,
 ) -> Result<(), Error> {
-    // let mut i = 0;
+    let mut i = 0;
     while let Some(rb) = reader.next_batch().unwrap() {
-        // if i < 363 {
-        //     i += 1;
-        //     continue;
-        // }
+        if i < 364 {
+            i += 1;
+            continue;
+        }
         let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
@@ -391,7 +391,7 @@ fn time_group_by_agg(store: &Store) {
         let groups = segments.read_group_eq(
             (0, 1590044410000000),
             &[("method", Some(&column::Scalar::String("GET")))],
-            vec!["env".to_string()],
+            vec!["env".to_string(), "status".to_string()],
             vec![
                 ("counter".to_string(), Aggregate::Sum),
                 // ("counter".to_string(), Aggregate::Count),
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 4e55ed8196..8375708e91 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -66,8 +66,8 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
 
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
-    Float(Vec<&'a f64>),
-    Integer(Vec<&'a i64>),
+    Float(Vec<f64>),
+    Integer(Vec<i64>),
 }
 
 impl<'a> Vector<'a> {
@@ -117,7 +117,7 @@ impl<'a> std::fmt::Display for Vector<'a> {
             Self::String(v) => write!(f, "{:?}", v),
             Self::Float(v) => write!(f, "{:?}", v),
             Self::Integer(v) => {
-                for x in v {
+                for x in v.iter() {
                     let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0);
                     write!(f, "{}, ", ts)?;
                 }
@@ -153,6 +153,8 @@ impl Column {
         }
     }
 
+    /// Materialise all of the decoded values matching the provided logical
+    /// row ids.
     pub fn value(&self, row_id: usize) -> Option<Scalar> {
         match self {
             Column::String(c) => {
@@ -180,18 +182,50 @@ impl Column {
         }
     }
 
+    /// Materialise all of the encoded values matching the provided logical
+    /// row ids.
+    pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector {
+        match self {
+            Column::String(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::Integer(c.encoded_values(&row_id_vec))
+            }
+            Column::Float(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Float(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::Float(c.encoded_values(&row_id_vec))
+            }
+            Column::Integer(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::Integer(c.encoded_values(&row_id_vec))
+            }
+        }
+    }
+
     /// materialise rows for each row_id
-    pub fn rows(&self, row_ids: &[usize]) -> Vector {
+    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
+        let row_ids_vec = row_ids.to_vec();
         assert!(
-            row_ids.len() == 1 || row_ids[row_ids.len() - 1] > row_ids[0],
+            row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0],
             "got last row_id={:?} and first row_id={:?}",
-            row_ids[row_ids.len() - 1],
-            row_ids[0]
+            row_ids_vec[row_ids_vec.len() - 1],
+            row_ids_vec[0]
         );
         match self {
-            Column::String(c) => Vector::String(c.values(row_ids)),
-            Column::Float(c) => Vector::Float(c.values(row_ids)),
-            Column::Integer(c) => Vector::Integer(c.values(row_ids)),
+            Column::String(c) => Vector::String(c.values(&row_ids_vec)),
+            Column::Float(c) => Vector::Float(c.values(&row_ids_vec)),
+            Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)),
         }
     }
 
@@ -555,10 +589,14 @@ impl String {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<std::string::String>> {
+    pub fn values(&self, row_ids: &[u32]) -> Vec<&Option<std::string::String>> {
         self.data.values(row_ids)
     }
 
+    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+        self.data.encoded_values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> Vec<&Option<std::string::String>> {
         self.data.scan_from(row_id)
     }
@@ -597,10 +635,14 @@ impl Float {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<&f64> {
+    pub fn values(&self, row_ids: &[u32]) -> Vec<f64> {
         self.data.values(row_ids)
     }
 
+    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<f64> {
+        self.data.encoded_values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[f64] {
         self.data.scan_from(row_id)
     }
@@ -654,10 +696,14 @@ impl Integer {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<&i64> {
+    pub fn values(&self, row_ids: &[u32]) -> Vec<i64> {
         self.data.values(row_ids)
     }
 
+    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+        self.data.encoded_values(row_ids)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[i64] {
         self.data.scan_from(row_id)
     }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index c2d0052f3e..d159a557f7 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -43,14 +43,21 @@ where
         self.values[row_id]
     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<&T> {
+    /// Return the decoded values for the provided logical row ids.
+    pub fn values(&self, row_ids: &[u32]) -> Vec<T> {
         let mut out = Vec::with_capacity(row_ids.len());
         for row_id in row_ids {
-            out.push(&self.values[*row_id]);
+            out.push(self.values[*row_id as usize]);
         }
         out
     }
 
+    /// Return the raw encoded values for the provided logical row ids. For Plain
+    /// encoding this is just the decoded values.
+    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<T> {
+        self.values(row_ids)
+    }
+
     // TODO(edd): fix this when added NULL support
     pub fn scan_from_until_some(&self, row_id: usize) -> Option<T> {
         unreachable!("to remove");
@@ -400,9 +407,9 @@ impl DictionaryRLE {
         None
     }
 
-    // materialises a vector of references to logical values in the
-    // encoding for each provided row_id.
-    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<String>> {
+    // materialises a vector of references to the decoded values in the
+    // each provided row_id.
+    pub fn values(&self, row_ids: &[u32]) -> Vec<&Option<String>> {
         let mut out: Vec<&Option<String>> = Vec::with_capacity(row_ids.len());
 
         let mut curr_logical_row_id = 0;
@@ -435,6 +442,42 @@ impl DictionaryRLE {
         out
     }
 
+    /// Return the raw encoded values for the provided logical row ids.
+    ///
+    /// TODO(edd): return type is wrong but I'm making it fit
+    ///
+    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+        let mut out: Vec<i64> = Vec::with_capacity(row_ids.len());
+
+        let mut curr_logical_row_id = 0;
+
+        let mut run_lengths_iter = self.run_lengths.iter();
+        let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap();
+
+        for wanted_row_id in row_ids {
+            while curr_logical_row_id + curr_entry_rl <= *wanted_row_id as u64 {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                match run_lengths_iter.next() {
+                    Some(res) => {
+                        curr_entry_id = res.0;
+                        curr_entry_rl = res.1;
+                    }
+                    None => panic!("shouldn't get here"),
+                }
+            }
+
+            // this entry covers the row_id we want.
+            out.push(curr_entry_id as i64);
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert_eq!(row_ids.len(), out.len());
+        out
+    }
+
     // values materialises a vector of references to all logical values in the
     // encoding.
     pub fn all_values(&mut self) -> Vec<Option<&String>> {
@@ -730,6 +773,41 @@ mod test {
         assert_eq!(results, exp);
     }
 
+    #[test]
+    fn dict_rle_encoded_values() {
+        let mut drle = super::DictionaryRLE::new();
+        let west = Some("west".to_string());
+        let east = Some("east".to_string());
+        let north = Some("north".to_string());
+        drle.push_additional(west.clone(), 3);
+        drle.push_additional(east.clone(), 2);
+        drle.push_additional(north.clone(), 4);
+        drle.push_additional(west.clone(), 3);
+
+        let results = drle.encoded_values(&[0, 1, 4, 5]);
+
+        // w,w,w,e,e,n,n,n,n,w,w,w
+        // 0,0,0,1,1,2,2,2,2,0,0,0
+        let exp = vec![0, 0, 1, 2];
+        assert_eq!(results, exp);
+
+        let results = drle.encoded_values(&[10, 11]);
+        let exp = vec![0, 0];
+        assert_eq!(results, exp);
+
+        let results = drle.encoded_values(&[0, 3, 5, 11]);
+        let exp = vec![0, 1, 2, 0];
+        assert_eq!(results, exp);
+
+        let results = drle.encoded_values(&[0]);
+        let exp = vec![0];
+        assert_eq!(results, exp);
+
+        let results = drle.encoded_values(&[0, 9]);
+        let exp = vec![0, 0];
+        assert_eq!(results, exp);
+    }
+
     #[test]
     fn rle_dict_row_ids() {
         let mut drle = super::DictionaryRLE::new();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index e578578b7e..6871dc9e20 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -103,12 +103,15 @@ impl Segment {
         None
     }
 
-    // Materialise all rows for each desired column. `rows` expects `row_ids` to
-    // be ordered in ascending order.
+    // Materialise all rows for each desired column.
     //
     // `columns` determines which column values are returned. An empty `columns`
     // value will result in rows for all columns being returned.
-    pub fn rows(&self, row_ids: &[usize], columns: &[String]) -> BTreeMap<String, column::Vector> {
+    pub fn rows(
+        &self,
+        row_ids: &croaring::Bitmap,
+        columns: &[String],
+    ) -> BTreeMap<String, column::Vector> {
         let mut rows: BTreeMap<String, column::Vector> = BTreeMap::new();
         if row_ids.is_empty() {
             // nothing to return
@@ -141,6 +144,37 @@ impl Segment {
         None
     }
 
+    pub fn aggregate_by_groups(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: Vec<String>,
+        aggregates: &Vec<(String, Aggregate)>,
+    ) -> BTreeMap<Vec<String>, Vec<(String, Aggregate)>> {
+        // Build a hash table - essentially, scan columns for matching row ids,
+        // emitting the encoded value for each column and track those value
+        // combinations in a hashmap with running aggregates.
+
+        // filter on predicates and time
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeMap::new();
+        }
+
+        // materialise all encoded values for the matching rows.
+        // let mut column_encoded_values = Vec::with_capacity(group_columns.len());
+        for group_column in group_columns {
+            // if let Some(column) = self.column(&group_column) {
+            //     column_encoded_values.push(Some(column.encoded_values(&filtered_row_ids));
+            // } else {
+            //     column_encoded_values.push(None);
+            // }
+        }
+        BTreeMap::new()
+    }
+
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
@@ -416,10 +450,7 @@ impl<'a> Segments<'a> {
                 continue; // segment doesn't have time range
             }
             if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) {
-                let bm_vec = bm.to_vec();
-                let row_ids = bm_vec.iter().map(|v| *v as usize).collect::<Vec<usize>>();
-
-                let rows = segment.rows(&row_ids, &select_columns);
+                let rows = segment.rows(&bm, &select_columns);
                 for (k, v) in rows {
                     let segment_values = columns.get_mut(&k);
                     match segment_values {

From 3df79a675d15ab08fcfc00dee8155b2ad17b0d43 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 13 Aug 2020 10:07:34 +0100
Subject: [PATCH 19/73] feat: multi-group hash

---
 delorean_mem_qe/src/bin/main.rs |  38 +++---
 delorean_mem_qe/src/column.rs   | 121 +++++++++++++++++
 delorean_mem_qe/src/segment.rs  | 231 ++++++++++++++++++++++++++------
 3 files changed, 331 insertions(+), 59 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 6efffbf8b5..e6bd236c26 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -36,7 +36,7 @@ fn main() {
         store.size(),
     );
 
-    time_group_by_agg(&store);
+    // time_group_by_agg(&store);
 
     // time_column_min_time(&store);
     // time_column_max_time(&store);
@@ -107,18 +107,20 @@ fn main() {
     //     println!("ROWS ({}) {:?}", v, v.len());
     // }
 
-    // let now = std::time::Instant::now();
-    // let segments = store.segments();
-    // let groups = segments.read_group_eq(
-    //     (0, 1590044410000000),
-    //     &[],
-    //     vec!["env".to_string()],
-    //     vec![
-    //         // ("counter".to_string(), Aggregate::Sum),
-    //         ("counter".to_string(), Aggregate::Count),
-    //     ],
-    // );
-    // println!("{:?} {:?}", groups, now.elapsed());
+    loop {
+        let now = std::time::Instant::now();
+        let segments = store.segments();
+        let groups = segments.read_group_eq(
+            (0, 1590044410000000),
+            &[],
+            vec!["env".to_string(), "status".to_string()],
+            vec![
+                ("counter".to_string(), Aggregate::Sum),
+                // ("counter".to_string(), Aggregate::Count),
+            ],
+        );
+        println!("{:?} {:?}", groups, now.elapsed());
+    }
 
     // loop {
     //     let mut total_count = 0.0;
@@ -157,12 +159,12 @@ fn build_store(
     mut reader: arrow::ipc::reader::StreamReader<File>,
     store: &mut Store,
 ) -> Result<(), Error> {
-    let mut i = 0;
+    // let mut i = 0;
     while let Some(rb) = reader.next_batch().unwrap() {
-        if i < 364 {
-            i += 1;
-            continue;
-        }
+        // if i < 364 {
+        //     i += 1;
+        //     continue;
+        // }
         let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 8375708e91..ec62fd8104 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -35,6 +35,30 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
     }
 }
 
+impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
+    fn add_assign(&mut self, _rhs: &Scalar<'a>) {
+        match self {
+            Self::Float(v) => {
+                if let Self::Float(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::Integer(v) => {
+                if let Self::Integer(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::String(_) => {
+                unreachable!("not possible to add strings");
+            }
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 pub enum Aggregate<'a> {
     Count(u64),
@@ -64,6 +88,35 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
     }
 }
 
+// impl<'a> std::ops::Add<&Scalar<'a>> for Aggregate<'a> {
+//     type Output = Aggregate<'a>;
+
+//     fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
+//         match _rhs {
+//             Scalar::String(v) => {}
+//             Scalar::Float(v) => {}
+//             Scalar::Integer(v) => {}
+//         }
+//         // match self {
+//         //     Self::Count(c) => {
+//         //         match
+//         //         if let Scalar::Count(other) = _rhs {
+//         //             return Self::Count(c + other);
+//         //         } else {
+//         //             panic!("invalid");
+//         //         };
+//         //     }
+//         //     Self::Sum(s) => {
+//         //         if let Self::Sum(other) = _rhs {
+//         //             return Self::Sum(s + other);
+//         //         } else {
+//         //             panic!("invalid");
+//         //         };
+//         //     }
+//         // }
+//     }
+// }
+
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
     Float(Vec<f64>),
@@ -82,6 +135,16 @@ impl<'a> Vector<'a> {
             Self::Integer(v) => v.len(),
         }
     }
+
+    pub fn get(&self, i: usize) -> Scalar<'a> {
+        match self {
+            // FIXME(edd): SORT THIS OPTION OUT
+            Self::String(v) => Scalar::String(v[i].as_ref().unwrap()),
+            Self::Float(v) => Scalar::Float(v[i]),
+            Self::Integer(v) => Scalar::Integer(v[i]),
+        }
+    }
+
     pub fn extend(&mut self, other: Self) {
         match self {
             Self::String(v) => {
@@ -109,6 +172,33 @@ impl<'a> Vector<'a> {
     }
 }
 
+/// VectorIterator allows a `Vector` to be iterated. Until vectors are drained
+/// Scalar values are emitted.
+pub struct VectorIterator<'a> {
+    v: Vector<'a>,
+    next_i: usize,
+}
+
+impl<'a> VectorIterator<'a> {
+    pub fn new(v: Vector<'a>) -> Self {
+        Self { v, next_i: 0 }
+    }
+}
+impl<'a> Iterator for VectorIterator<'a> {
+    type Item = Scalar<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let curr_i = self.next_i;
+        self.next_i += 1;
+
+        if curr_i == self.v.len() {
+            return None;
+        }
+
+        Some(self.v.get(curr_i))
+    }
+}
+
 use chrono::prelude::*;
 
 impl<'a> std::fmt::Display for Vector<'a> {
@@ -182,6 +272,37 @@ impl Column {
         }
     }
 
+    /// Materialise all of the decoded values matching the provided logical
+    /// row ids.
+    pub fn values(&self, row_ids: &croaring::Bitmap) -> Vector {
+        match self {
+            Column::String(c) => {
+                if row_ids.is_empty() {
+                    return Vector::String(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::String(c.values(&row_id_vec))
+            }
+            Column::Float(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Float(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::Float(c.values(&row_id_vec))
+            }
+            Column::Integer(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                let row_id_vec = row_ids.to_vec();
+                Vector::Integer(c.values(&row_id_vec))
+            }
+        }
+    }
+
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
     pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 6871dc9e20..f4a9d2c1fa 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 
 use super::column;
 use super::column::Column;
@@ -148,9 +148,10 @@ impl Segment {
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: Vec<String>,
-        aggregates: &Vec<(String, Aggregate)>,
-    ) -> BTreeMap<Vec<String>, Vec<(String, Aggregate)>> {
+        group_columns: &[String],
+        aggregates: &[(String, Aggregate)],
+    ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
+        // println!("working segment {:?}", time_range);
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -162,16 +163,163 @@ impl Segment {
         } else {
             return BTreeMap::new();
         }
+        let total_rows = &filtered_row_ids.cardinality();
+        // println!("TOTAL FILTERED ROWS {:?}", total_rows);
 
-        // materialise all encoded values for the matching rows.
-        // let mut column_encoded_values = Vec::with_capacity(group_columns.len());
+        // materialise all encoded values for the matching rows in the columns
+        // we are grouping on and store each group as an iterator.
+        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
-            // if let Some(column) = self.column(&group_column) {
-            //     column_encoded_values.push(Some(column.encoded_values(&filtered_row_ids));
-            // } else {
-            //     column_encoded_values.push(None);
-            // }
+            if let Some(column) = self.column(&group_column) {
+                let encoded_values: Vec<i64>;
+                if let column::Vector::Integer(vector) = column.encoded_values(&filtered_row_ids) {
+                    encoded_values = vector;
+                } else {
+                    unimplemented!("currently you can only group on encoded string columns");
+                }
+
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    encoded_values.len()
+                );
+                group_column_encoded_values.push(Some(encoded_values));
+            } else {
+                group_column_encoded_values.push(None);
+            }
         }
+        // println!("grouped columns {:?}", group_column_encoded_values);
+
+        // TODO(edd): we could do this with an iterator I expect.
+        //
+        // materialise all decoded values for the rows in the columns we are
+        // aggregating on.
+        let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
+        for (column_name, _) in aggregates {
+            if let Some(column) = self.column(&column_name) {
+                let decoded_values = column.values(&filtered_row_ids);
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    decoded_values.len()
+                );
+                aggregate_column_decoded_values.push((column_name, Some(decoded_values)));
+            } else {
+                aggregate_column_decoded_values.push((column_name, None));
+            }
+        }
+
+        // now we have all the matching rows for each grouping column and each aggregation
+        // column. Materialised values for grouping are in encoded form.
+        //
+        // Next we iterate all rows in all columns and create a hash entry with
+        // running aggregates.
+
+        // First we will build a collection of iterators over the columns we
+        // are grouping on. For columns that have no matching rows from the
+        // filtering stage we will just emit None.
+        let mut group_itrs = group_column_encoded_values
+            .iter()
+            .map(|x| match x {
+                Some(values) => Some(values.iter()),
+                None => None,
+            })
+            .collect::<Vec<_>>();
+
+        // Next we will build a collection of iterators over the columns we
+        // are aggregating on. For columns that have no matching rows from the
+        // filtering stage we will just emit None.
+        let mut aggregate_itrs = aggregate_column_decoded_values
+            .into_iter()
+            .map(|(col_name, values)| match values {
+                Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
+                None => (col_name.as_str(), None),
+            })
+            .collect::<Vec<_>>();
+
+        let mut hash_table: HashMap<
+            Vec<Option<&i64>>,
+            Vec<(&String, &Aggregate, Option<column::Aggregate>)>,
+        > = HashMap::with_capacity(30000);
+
+        let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
+            std::iter::repeat_with(|| ("", None))
+                .take(aggregate_itrs.len())
+                .collect();
+
+        let mut processed_rows = 0;
+        while processed_rows < *total_rows {
+            let group_row: Vec<Option<&i64>> = group_itrs
+                .iter_mut()
+                .map(|x| match x {
+                    Some(itr) => itr.next(),
+                    None => None,
+                })
+                .collect();
+
+            // let aggregate_row: Vec<(&str, Option<column::Scalar>)> = aggregate_itrs
+            //     .iter_mut()
+            //     .map(|&mut (col_name, ref mut itr)| match itr {
+            //         Some(itr) => (col_name, itr.next()),
+            //         None => (col_name, None),
+            //     })
+            //     .collect();
+
+            // re-use aggregate_row vector.
+            for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
+                match itr {
+                    Some(itr) => aggregate_row[i] = (col_name, itr.next()),
+                    None => aggregate_row[i] = (col_name, None),
+                }
+            }
+
+            // Lookup the group key in the hash map - if it's empty then insert
+            // a place-holder for each aggregate being executed.
+            let group_key_entry = hash_table.entry(group_row).or_insert_with(|| {
+                // TODO COULD BE MAP/COLLECT
+                let mut agg_results: Vec<(&String, &Aggregate, Option<column::Aggregate>)> =
+                    Vec::with_capacity(aggregates.len());
+                for (col_name, agg_type) in aggregates {
+                    agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
+                }
+                agg_results
+            });
+
+            // Update aggregates - we process each row value and for each one
+            // check which aggregates apply to it.
+            //
+            // TODO(edd): this is probably a bit of a perf suck.
+            for (col_name, row_value) in &aggregate_row {
+                for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in
+                    group_key_entry.iter_mut()
+                {
+                    if col_name != cum_col_name {
+                        continue;
+                    }
+
+                    // TODO(edd): remove unwrap - it should work because we are
+                    // tracking iteration count in loop.
+                    let row_value = row_value.as_ref().unwrap();
+
+                    match cum_agg_value {
+                        Some(agg) => match agg {
+                            column::Aggregate::Count(cum_count) => {
+                                *cum_count += 1;
+                            }
+                            column::Aggregate::Sum(cum_sum) => {
+                                *cum_sum += row_value;
+                            }
+                        },
+                        None => {
+                            *cum_agg_value = match agg_type {
+                                Aggregate::Count => Some(column::Aggregate::Count(0)),
+                                Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())),
+                            }
+                        }
+                    }
+                }
+            }
+            processed_rows += 1;
+        }
+        // println!("{:?}", hash_table.len());
         BTreeMap::new()
     }
 
@@ -476,46 +624,47 @@ impl<'a> Segments<'a> {
         group_columns: Vec<String>,
         aggregates: Vec<(String, Aggregate)>,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
-        // TODO(edd): support multi column groups
-        assert_eq!(group_columns.len(), 1);
-
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
         }
 
+        for segment in &self.segments {
+            segment.aggregate_by_groups(time_range, predicates, &group_columns, &aggregates);
+        }
+
         let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
             BTreeMap::new();
 
-        for segment in &self.segments {
-            let segment_results = segment.group_agg_by_predicate_eq(
-                time_range,
-                predicates,
-                &group_columns,
-                &aggregates,
-            );
+        // for segment in &self.segments {
+        //     let segment_results = segment.group_agg_by_predicate_eq(
+        //         time_range,
+        //         predicates,
+        //         &group_columns,
+        //         &aggregates,
+        //     );
 
-            for (k, segment_aggs) in segment_results {
-                // assert_eq!(v.len(), aggregates.len());
-                let cum_result = cum_results.get_mut(&k);
-                match cum_result {
-                    Some(cum) => {
-                        assert_eq!(cum.len(), segment_aggs.len());
-                        // In this case we need to aggregate the aggregates from
-                        // each segment.
-                        for i in 0..cum.len() {
-                            // TODO(edd): this is more expensive than necessary
-                            cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1);
-                        }
-                    }
-                    None => {
-                        cum_results.insert(k, segment_aggs);
-                    }
-                }
-            }
-        }
+        //     for (k, segment_aggs) in segment_results {
+        //         // assert_eq!(v.len(), aggregates.len());
+        //         let cum_result = cum_results.get_mut(&k);
+        //         match cum_result {
+        //             Some(cum) => {
+        //                 assert_eq!(cum.len(), segment_aggs.len());
+        //                 // In this case we need to aggregate the aggregates from
+        //                 // each segment.
+        //                 for i in 0..cum.len() {
+        //                     // TODO(edd): this is more expensive than necessary
+        //                     cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1);
+        //                 }
+        //             }
+        //             None => {
+        //                 cum_results.insert(k, segment_aggs);
+        //             }
+        //         }
+        //     }
+        // }
 
-        // columns
+        // // columns
         cum_results
     }
 

From b994831163dfea083c079e92041809b8cfbc4aab Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 13 Aug 2020 10:47:35 +0100
Subject: [PATCH 20/73] refactor: decoded encoded ids

---
 delorean_mem_qe/src/column.rs   | 27 +++++++++++++++++++++++++++
 delorean_mem_qe/src/encoding.rs |  7 +++++++
 2 files changed, 34 insertions(+)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index ec62fd8104..c4bca59542 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -334,6 +334,26 @@ impl Column {
         }
     }
 
+    /// Given an encoded value for a row, materialise and return the decoded
+    /// version.
+    ///
+    /// This currently just supports decoding integer scalars back into dictionary
+    /// strings.
+    pub fn decode_value(&self, encoded_id: i64) -> std::string::String {
+        match self {
+            Column::String(c) => {
+                // FIX THIS UNWRAP AND HOPE THERE ARE NO NULL VALUES!
+                c.decode_id(encoded_id).unwrap()
+            }
+            Column::Float(c) => {
+                unreachable!("this isn't supported right now");
+            }
+            Column::Integer(c) => {
+                unreachable!("this isn't supported right now");
+            }
+        }
+    }
+
     /// materialise rows for each row_id
     pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
         let row_ids_vec = row_ids.to_vec();
@@ -718,6 +738,13 @@ impl String {
         self.data.encoded_values(row_ids)
     }
 
+    /// Return the decoded value for an encoded ID.
+    ///
+    /// Panics if there is no decoded value for the provided id
+    pub fn decode_id(&self, encoded_id: i64) -> Option<std::string::String> {
+        self.data.decode_id(encoded_id as usize)
+    }
+
     pub fn scan_from(&self, row_id: usize) -> Vec<&Option<std::string::String>> {
         self.data.scan_from(row_id)
     }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index d159a557f7..7285c1268a 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -442,6 +442,13 @@ impl DictionaryRLE {
         out
     }
 
+    /// Return the decoded value for an encoded ID.
+    ///
+    /// Panics if there is no decoded value for the provided id
+    pub fn decode_id(&self, encoded_id: usize) -> Option<String> {
+        self.index_entry.get(&encoded_id).unwrap().clone()
+    }
+
     /// Return the raw encoded values for the provided logical row ids.
     ///
     /// TODO(edd): return type is wrong but I'm making it fit

From c1cbbf18f8244750a0164b8a07607d609ff8d10f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 13 Aug 2020 11:38:27 +0100
Subject: [PATCH 21/73] fix: column sorting pre-check

---
 delorean_table/src/sorter.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs
index 725a12f30e..fb0add52e7 100644
--- a/delorean_table/src/sorter.rs
+++ b/delorean_table/src/sorter.rs
@@ -67,7 +67,7 @@ pub fn sort(packers: &mut [Packers], sort_by: &[usize]) -> Result<(), Error> {
     if n > SORTED_CHECK_SIZE {
         let mut sorted = true;
         for i in 1..n {
-            if cmp(packers, 0, i, sort_by) != Ordering::Equal {
+            if cmp(packers, i - 1, i, sort_by) == Ordering::Greater {
                 sorted = false;
                 break;
             }

From d70d5dde9de94f9ea51eedeb7eea486b00d3b1b4 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 13 Aug 2020 15:54:11 +0100
Subject: [PATCH 22/73] feat: support pre-populating dictionary

---
 delorean_mem_qe/src/encoding.rs | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 7285c1268a..cc231e087c 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -267,6 +267,31 @@ impl DictionaryRLE {
         }
     }
 
+    pub fn with_dictionary(dictionary: BTreeSet<Option<String>>) -> Self {
+        let mut _self = Self {
+            entry_index: BTreeMap::new(),
+            entry_row_ids: BTreeMap::new(),
+            index_entry: BTreeMap::new(),
+            map_size: 0,
+            run_lengths: Vec::new(),
+            run_length_size: 0,
+            total: 0,
+        };
+
+        for (next_idx, k) in dictionary.iter().enumerate() {
+            _self.entry_index.insert(k.to_owned(), next_idx);
+            _self.index_entry.insert(next_idx, k.to_owned());
+
+            _self
+                .entry_row_ids
+                .insert(k.to_owned(), croaring::Bitmap::create());
+
+            _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta
+        }
+
+        _self
+    }
+
     pub fn push(&mut self, v: &str) {
         self.push_additional(Some(v.to_owned()), 1);
     }
@@ -380,11 +405,8 @@ impl DictionaryRLE {
     //     unreachable!("for now");
     // }
 
-    pub fn dictionary(&self) -> BTreeSet<Option<String>> {
-        self.entry_index
-            .keys()
-            .cloned()
-            .collect::<BTreeSet<Option<String>>>()
+    pub fn dictionary(&self) -> BTreeMap<Option<String>, usize> {
+        self.entry_index.clone()
     }
 
     // get the logical value at the provided index, or None if there is no value

From 231f429a56402629bb2ab34af4a2cc97bed1da2d Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 13 Aug 2020 21:08:41 +0100
Subject: [PATCH 23/73] feat: sort group by measurement

---
 Cargo.lock                      |   1 +
 delorean_mem_qe/Cargo.toml      |   5 +
 delorean_mem_qe/src/bin/main.rs |  35 +++-
 delorean_mem_qe/src/column.rs   | 145 +++++++++++++---
 delorean_mem_qe/src/encoding.rs |  57 +++---
 delorean_mem_qe/src/segment.rs  | 295 ++++++++++++++++++++++++++++++--
 delorean_table/src/sorter.rs    |  44 ++++-
 7 files changed, 513 insertions(+), 69 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index effb3598b4..4ccedecd78 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -767,6 +767,7 @@ dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
  "chrono",
  "croaring",
+ "crossbeam",
  "delorean_table",
  "snafu",
 ]
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 32531e888b..7f8ac255a3 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -6,11 +6,16 @@ edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+
+[profile.release]
+debug = true
+
 [dependencies]
 delorean_table = { path = "../delorean_table" }
 arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
 snafu = "0.6.8"
 croaring = "0.4.5"
+crossbeam = "0.7.3"
 chrono = "0.4"
 
 [dev-dependencies]
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index e6bd236c26..aa3bbe4654 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -111,9 +111,9 @@ fn main() {
         let now = std::time::Instant::now();
         let segments = store.segments();
         let groups = segments.read_group_eq(
-            (0, 1590044410000000),
+            (0, 1890040790000000),
             &[],
-            vec!["env".to_string(), "status".to_string()],
+            vec!["env".to_string(), "role".to_string()],
             vec![
                 ("counter".to_string(), Aggregate::Sum),
                 // ("counter".to_string(), Aggregate::Count),
@@ -159,12 +159,12 @@ fn build_store(
     mut reader: arrow::ipc::reader::StreamReader<File>,
     store: &mut Store,
 ) -> Result<(), Error> {
-    // let mut i = 0;
+    let mut i = 0;
     while let Some(rb) = reader.next_batch().unwrap() {
-        // if i < 364 {
-        //     i += 1;
-        //     continue;
-        // }
+        if i < 363 {
+            i += 1;
+            continue;
+        }
         let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
@@ -204,7 +204,26 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
                     .downcast_ref::<array::StringArray>()
                     .unwrap();
 
-                let mut c = column::String::default();
+                // IMPORTANT - build a set of values (dictionary) ahead of
+                // time so we can ensure we encoded the column in an ordinally
+                // correct manner.
+                //
+                // We can use a trick where encoded integers are ordered according
+                // to the decoded values, making sorting, comparison and grouping
+                // more efficient.
+                //
+                let mut dictionary: std::collections::BTreeSet<Option<String>> =
+                    std::collections::BTreeSet::new();
+                for j in 1..arr.len() {
+                    let next = if column.is_null(j) {
+                        None
+                    } else {
+                        Some(arr.value(j).to_string())
+                    };
+                    dictionary.insert(next);
+                }
+
+                let mut c = column::String::with_dictionary(dictionary);
                 let mut prev = if !column.is_null(0) {
                     Some(arr.value(0))
                 } else {
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index c4bca59542..08d95165b5 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -274,14 +274,53 @@ impl Column {
 
     /// Materialise all of the decoded values matching the provided logical
     /// row ids.
-    pub fn values(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn values(&self, row_ids: &[usize]) -> Vector {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
                     return Vector::String(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
+                Vector::String(c.values(row_ids))
+            }
+            Column::Float(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Float(vec![]);
+                }
+
+                let now = std::time::Instant::now();
+                let v = c.values(row_ids);
+                println!("time getting decoded values for float {:?}", now.elapsed());
+
+                Vector::Float(v)
+            }
+            Column::Integer(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                let now = std::time::Instant::now();
+                let v = c.values(row_ids);
+                println!("time getting decoded values for int {:?}", now.elapsed());
+                Vector::Integer(v)
+            }
+        }
+    }
+
+    /// Materialise all of the decoded values matching the provided logical
+    /// row ids within the bitmap
+    pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
+        match self {
+            Column::String(c) => {
+                if row_ids.is_empty() {
+                    return Vector::String(vec![]);
+                }
+
+                let row_id_vec = row_ids
+                    .to_vec()
+                    .iter()
+                    .map(|v| *v as usize)
+                    .collect::<Vec<_>>();
                 Vector::String(c.values(&row_id_vec))
             }
             Column::Float(c) => {
@@ -289,7 +328,11 @@ impl Column {
                     return Vector::Float(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
+                let row_id_vec = row_ids
+                    .to_vec()
+                    .iter()
+                    .map(|v| *v as usize)
+                    .collect::<Vec<_>>();
                 Vector::Float(c.values(&row_id_vec))
             }
             Column::Integer(c) => {
@@ -297,7 +340,11 @@ impl Column {
                     return Vector::Integer(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
+                let row_id_vec = row_ids
+                    .to_vec()
+                    .iter()
+                    .map(|v| *v as usize)
+                    .collect::<Vec<_>>();
                 Vector::Integer(c.values(&row_id_vec))
             }
         }
@@ -305,31 +352,70 @@ impl Column {
 
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
-    pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
+        let now = std::time::Instant::now();
+        let row_ids_vec = row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+        println!("time unpacking bitmap {:?}", now.elapsed());
+
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
                     return Vector::Integer(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
-                Vector::Integer(c.encoded_values(&row_id_vec))
+                let now = std::time::Instant::now();
+                let v = c.encoded_values(&row_ids_vec);
+                println!("time getting encoded values {:?}", now.elapsed());
+                Vector::Integer(v)
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
                     return Vector::Float(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
-                Vector::Float(c.encoded_values(&row_id_vec))
+                Vector::Float(c.encoded_values(&row_ids_vec))
             }
             Column::Integer(c) => {
                 if row_ids.is_empty() {
                     return Vector::Integer(vec![]);
                 }
 
-                let row_id_vec = row_ids.to_vec();
-                Vector::Integer(c.encoded_values(&row_id_vec))
+                Vector::Integer(c.encoded_values(&row_ids_vec))
+            }
+        }
+    }
+
+    /// Materialise all of the encoded values matching the provided logical
+    /// row ids.
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vector {
+        match self {
+            Column::String(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                let now = std::time::Instant::now();
+                let v = c.encoded_values(&row_ids);
+                println!("time getting encoded values {:?}", now.elapsed());
+                Vector::Integer(v)
+            }
+            Column::Float(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Float(vec![]);
+                }
+
+                Vector::Float(c.encoded_values(&row_ids))
+            }
+            Column::Integer(c) => {
+                if row_ids.is_empty() {
+                    return Vector::Integer(vec![]);
+                }
+
+                Vector::Integer(c.encoded_values(&row_ids))
             }
         }
     }
@@ -356,7 +442,14 @@ impl Column {
 
     /// materialise rows for each row_id
     pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
-        let row_ids_vec = row_ids.to_vec();
+        let now = std::time::Instant::now();
+        let row_ids_vec = row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+        println!("time unpacking bitmap {:?}", now.elapsed());
+
         assert!(
             row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0],
             "got last row_id={:?} and first row_id={:?}",
@@ -555,9 +648,7 @@ impl Column {
         }
     }
 
-    pub fn group_by_ids(
-        &self,
-    ) -> &std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap> {
+    pub fn group_by_ids(&self) -> &std::collections::BTreeMap<u32, croaring::Bitmap> {
         match self {
             Column::String(c) => c.data.group_row_ids(),
             Column::Float(_) => unimplemented!("not implemented"),
@@ -708,6 +799,14 @@ pub struct String {
 }
 
 impl String {
+    pub fn with_dictionary(
+        dictionary: std::collections::BTreeSet<Option<std::string::String>>,
+    ) -> Self {
+        let mut c = Self::default();
+        c.data = encoding::DictionaryRLE::with_dictionary(dictionary);
+        c
+    }
+
     pub fn add(&mut self, s: &str) {
         self.meta.add(Some(s.to_string()));
         self.data.push(s);
@@ -730,11 +829,11 @@ impl String {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[u32]) -> Vec<&Option<std::string::String>> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<std::string::String>> {
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
         self.data.encoded_values(row_ids)
     }
 
@@ -755,9 +854,7 @@ impl String {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    pub fn group_row_ids(
-        &self,
-    ) -> &std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap> {
+    pub fn group_row_ids(&self) -> &std::collections::BTreeMap<u32, croaring::Bitmap> {
         self.data.group_row_ids()
     }
 }
@@ -783,11 +880,11 @@ impl Float {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[u32]) -> Vec<f64> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<f64> {
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<f64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<f64> {
         self.data.encoded_values(row_ids)
     }
 
@@ -844,11 +941,11 @@ impl Integer {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[u32]) -> Vec<i64> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<i64> {
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
         self.data.encoded_values(row_ids)
     }
 
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index cc231e087c..de7090115c 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -44,17 +44,26 @@ where
     }
 
     /// Return the decoded values for the provided logical row ids.
-    pub fn values(&self, row_ids: &[u32]) -> Vec<T> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<T> {
         let mut out = Vec::with_capacity(row_ids.len());
-        for row_id in row_ids {
-            out.push(self.values[*row_id as usize]);
+        for chunks in row_ids.chunks_exact(4) {
+            out.push(self.values[chunks[3]]);
+            out.push(self.values[chunks[2]]);
+            out.push(self.values[chunks[1]]);
+            out.push(self.values[chunks[0]]);
+            // out.push(self.values[row_id]);
+        }
+
+        let rem = row_ids.len() % 4;
+        for &i in &row_ids[row_ids.len() - rem..row_ids.len()] {
+            out.push(self.values[i]);
         }
         out
     }
 
     /// Return the raw encoded values for the provided logical row ids. For Plain
     /// encoding this is just the decoded values.
-    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<T> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
         self.values(row_ids)
     }
 
@@ -238,12 +247,12 @@ pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
     entry_index: BTreeMap<Option<String>, usize>,
 
-    // Experiment - store rows that each entry has a value for
-    entry_row_ids: BTreeMap<Option<String>, croaring::Bitmap>,
-
     // stores the mapping between an index and its entry.
     index_entry: BTreeMap<usize, Option<String>>,
 
+    // Experiment - store rows that each entry has a value for
+    index_row_ids: BTreeMap<u32, croaring::Bitmap>,
+
     map_size: usize, // TODO(edd) this isn't perfect at all
 
     // stores tuples where each pair refers to a dictionary entry and the number
@@ -258,7 +267,7 @@ impl DictionaryRLE {
     pub fn new() -> Self {
         Self {
             entry_index: BTreeMap::new(),
-            entry_row_ids: BTreeMap::new(),
+            index_row_ids: BTreeMap::new(),
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
@@ -270,7 +279,7 @@ impl DictionaryRLE {
     pub fn with_dictionary(dictionary: BTreeSet<Option<String>>) -> Self {
         let mut _self = Self {
             entry_index: BTreeMap::new(),
-            entry_row_ids: BTreeMap::new(),
+            index_row_ids: BTreeMap::new(),
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
@@ -283,8 +292,8 @@ impl DictionaryRLE {
             _self.index_entry.insert(next_idx, k.to_owned());
 
             _self
-                .entry_row_ids
-                .insert(k.to_owned(), croaring::Bitmap::create());
+                .index_row_ids
+                .insert(next_idx as u32, croaring::Bitmap::create());
 
             _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta
         }
@@ -313,8 +322,8 @@ impl DictionaryRLE {
                         self.run_lengths.push((*idx, additional));
                         self.run_length_size += std::mem::size_of::<(usize, u64)>();
                     }
-                    self.entry_row_ids
-                        .get_mut(&v)
+                    self.index_row_ids
+                        .get_mut(&(*idx as u32))
                         .unwrap()
                         .add_range(self.total..self.total + additional);
                 }
@@ -325,8 +334,8 @@ impl DictionaryRLE {
                     let idx = self.entry_index.len();
 
                     self.entry_index.insert(v.clone(), idx);
-                    self.entry_row_ids
-                        .insert(v.clone(), croaring::Bitmap::create());
+                    self.index_row_ids
+                        .insert(idx as u32, croaring::Bitmap::create());
                     if let Some(value) = &v {
                         self.map_size += value.len();
                     }
@@ -334,8 +343,8 @@ impl DictionaryRLE {
                     self.map_size += 8 + std::mem::size_of::<usize>(); // TODO(edd): clean this option size up
 
                     self.run_lengths.push((idx, additional));
-                    self.entry_row_ids
-                        .get_mut(&v)
+                    self.index_row_ids
+                        .get_mut(&(idx as u32))
                         .unwrap()
                         .add_range(self.total..self.total + additional);
                     self.run_length_size += std::mem::size_of::<(usize, u64)>();
@@ -380,8 +389,8 @@ impl DictionaryRLE {
     }
 
     // get the set of row ids for each distinct value
-    pub fn group_row_ids(&self) -> &BTreeMap<Option<String>, croaring::Bitmap> {
-        &self.entry_row_ids
+    pub fn group_row_ids(&self) -> &BTreeMap<u32, croaring::Bitmap> {
+        &self.index_row_ids
     }
 
     // row_ids returns an iterator over the set of row ids matching the provided
@@ -431,7 +440,7 @@ impl DictionaryRLE {
 
     // materialises a vector of references to the decoded values in the
     // each provided row_id.
-    pub fn values(&self, row_ids: &[u32]) -> Vec<&Option<String>> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<&Option<String>> {
         let mut out: Vec<&Option<String>> = Vec::with_capacity(row_ids.len());
 
         let mut curr_logical_row_id = 0;
@@ -475,7 +484,7 @@ impl DictionaryRLE {
     ///
     /// TODO(edd): return type is wrong but I'm making it fit
     ///
-    pub fn encoded_values(&self, row_ids: &[u32]) -> Vec<i64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
         let mut out: Vec<i64> = Vec::with_capacity(row_ids.len());
 
         let mut curr_logical_row_id = 0;
@@ -706,21 +715,21 @@ mod test {
         assert_eq!(drle.value(8).unwrap(), "zoo");
 
         let row_ids = drle
-            .entry_row_ids
+            .index_row_ids
             .get(&Some("hello".to_string()))
             .unwrap()
             .to_vec();
         assert_eq!(row_ids, vec![0, 1, 3, 4, 5]);
 
         let row_ids = drle
-            .entry_row_ids
+            .index_row_ids
             .get(&Some("world".to_string()))
             .unwrap()
             .to_vec();
         assert_eq!(row_ids, vec![2]);
 
         let row_ids = drle
-            .entry_row_ids
+            .index_row_ids
             .get(&Some("zoo".to_string()))
             .unwrap()
             .to_vec();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index f4a9d2c1fa..cf04671683 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -138,13 +138,14 @@ impl Segment {
         &self,
         name: &str,
     ) -> Option<&std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap>> {
-        if let Some(c) = self.column(name) {
-            return Some(c.group_by_ids());
-        }
-        None
+        unimplemented!("just need to convert encoded keys into decoded strings");
+        // if let Some(c) = self.column(name) {
+        //     return Some(c.group_by_ids());
+        // }
+        // None
     }
 
-    pub fn aggregate_by_groups(
+    pub fn aggregate_by_group_with_hash(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
@@ -164,6 +165,13 @@ impl Segment {
             return BTreeMap::new();
         }
         let total_rows = &filtered_row_ids.cardinality();
+
+        // materialise the row ids we need to filter on as a vec.
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
         // println!("TOTAL FILTERED ROWS {:?}", total_rows);
 
         // materialise all encoded values for the matching rows in the columns
@@ -172,7 +180,9 @@ impl Segment {
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
                 let encoded_values: Vec<i64>;
-                if let column::Vector::Integer(vector) = column.encoded_values(&filtered_row_ids) {
+                if let column::Vector::Integer(vector) =
+                    column.encoded_values(&filtered_row_ids_vec)
+                {
                     encoded_values = vector;
                 } else {
                     unimplemented!("currently you can only group on encoded string columns");
@@ -196,7 +206,7 @@ impl Segment {
         let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
         for (column_name, _) in aggregates {
             if let Some(column) = self.column(&column_name) {
-                let decoded_values = column.values(&filtered_row_ids);
+                let decoded_values = column.values(&filtered_row_ids_vec);
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
                     decoded_values.len()
@@ -323,6 +333,235 @@ impl Segment {
         BTreeMap::new()
     }
 
+    pub fn aggregate_by_group_with_sort(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, Aggregate)],
+    ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
+        // filter on predicates and time
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeMap::new();
+        }
+        let total_rows = &filtered_row_ids.cardinality();
+        // println!("TOTAL FILTERED ROWS {:?}", total_rows);
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+
+        // materialise all encoded values for the matching rows in the columns
+        // we are grouping on and store each group as an iterator.
+        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
+        for group_column in group_columns {
+            if let Some(column) = self.column(&group_column) {
+                let encoded_values: delorean_table::Packer<i64>;
+                if let column::Vector::Integer(vector) =
+                    column.encoded_values(&filtered_row_ids_vec)
+                {
+                    encoded_values = delorean_table::Packer::from(vector);
+                } else {
+                    unimplemented!("currently you can only group on encoded string columns");
+                }
+
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    encoded_values.num_rows()
+                );
+                group_column_encoded_values.push(Some(encoded_values));
+            } else {
+                group_column_encoded_values.push(None);
+            }
+        }
+        // println!("grouped columns {:?}", group_column_encoded_values);
+
+        // TODO(edd): we could do this with an iterator I expect.
+        //
+        // materialise all decoded values for the rows in the columns we are
+        // aggregating on.
+        let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
+        for (column_name, _) in aggregates {
+            if let Some(column) = self.column(&column_name) {
+                let decoded_values = match column.values(&filtered_row_ids_vec) {
+                    column::Vector::String(_) => unreachable!("not supported"),
+                    column::Vector::Float(v) => delorean_table::Packers::from(v),
+                    column::Vector::Integer(v) => delorean_table::Packers::from(v),
+                };
+
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    decoded_values.num_rows()
+                );
+                aggregate_column_decoded_values.push((column_name, Some(decoded_values)));
+            } else {
+                aggregate_column_decoded_values.push((column_name, None));
+            }
+        }
+
+        let mut all_columns = Vec::with_capacity(
+            group_column_encoded_values.len() + aggregate_column_decoded_values.len(),
+        );
+
+        for gc in group_column_encoded_values {
+            if let Some(p) = gc {
+                all_columns.push(delorean_table::Packers::Integer(p));
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        for ac in aggregate_column_decoded_values {
+            if let (_, Some(p)) = ac {
+                all_columns.push(p);
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        // now sort on the first grouping columns. Right now the order doesn't matter...
+        let now = std::time::Instant::now();
+        delorean_table::sorter::sort(&mut all_columns, &[0, 1]).unwrap();
+        println!("time checking sort {:?}", now.elapsed());
+
+        let mut group_itrs = all_columns
+            .iter()
+            .take(group_columns.len()) // only use grouping columns
+            .map(|x| match x {
+                delorean_table::Packers::Integer(p) => p.iter(),
+                _ => {
+                    panic!("not here {:?} ", x);
+                }
+            })
+            .collect::<Vec<_>>();
+
+
+
+        // now we have all the matching rows for each grouping column and each aggregation
+        // column. Materialised values for grouping are in encoded form.
+        //
+        // Next we sort all columns according to the group key.
+        // let mut all_columns = vec![];
+        // for
+        // delorean_table::packers::Packers
+        // // First we will build a collection of iterators over the columns we
+        // // are grouping on. For columns that have no matching rows from the
+        // // filtering stage we will just emit None.
+
+        // // Next we will build a collection of iterators over the columns we
+        // // are aggregating on. For columns that have no matching rows from the
+        // // filtering stage we will just emit None.
+        // let mut aggregate_itrs = aggregate_column_decoded_values
+        //     .into_iter()
+        //     .map(|(col_name, values)| match values {
+        //         Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
+        //         None => (col_name.as_str(), None),
+        //     })
+        //     .collect::<Vec<_>>();
+
+        // let mut hash_table: HashMap<
+        //     Vec<Option<&i64>>,
+        //     Vec<(&String, &Aggregate, Option<column::Aggregate>)>,
+        // > = HashMap::with_capacity(30000);
+
+        let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> = agg
+
+        let mut processed_rows = 0;
+        let last_group_row: Vec<Option<&i64>> = group_itrs
+            .iter_mut()
+            .map(|itr| itr.next().unwrap())
+            .collect();
+
+        while processed_rows < *total_rows {
+            // let group_row: Vec<Option<&i64>> = group_itrs
+            //     .iter_mut()
+            //     .map(|x| match x {
+            //         Some(itr) => itr.next().unwrap(),
+            //         None => None,
+            //     })
+            //     .collect();
+
+            // check if group key has changed
+            for (&curr_v, itr) in last_group_row.iter().zip(&mut group_itrs) {
+                if curr_v != itr.next().unwrap() {
+                    // group key changed
+                }
+            }
+
+            // group key is the same - update aggregates
+
+            // // let aggregate_row: Vec<(&str, Option<column::Scalar>)> = aggregate_itrs
+            // //     .iter_mut()
+            // //     .map(|&mut (col_name, ref mut itr)| match itr {
+            // //         Some(itr) => (col_name, itr.next()),
+            // //         None => (col_name, None),
+            // //     })
+            // //     .collect();
+
+            // // re-use aggregate_row vector.
+            // for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
+            //     match itr {
+            //         Some(itr) => aggregate_row[i] = (col_name, itr.next()),
+            //         None => aggregate_row[i] = (col_name, None),
+            //     }
+            // }
+
+            // // Lookup the group key in the hash map - if it's empty then insert
+            // // a place-holder for each aggregate being executed.
+            // let group_key_entry = hash_table.entry(group_row).or_insert_with(|| {
+            //     // TODO COULD BE MAP/COLLECT
+            //     let mut agg_results: Vec<(&String, &Aggregate, Option<column::Aggregate>)> =
+            //         Vec::with_capacity(aggregates.len());
+            //     for (col_name, agg_type) in aggregates {
+            //         agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
+            //     }
+            //     agg_results
+            // });
+
+            // // Update aggregates - we process each row value and for each one
+            // // check which aggregates apply to it.
+            // //
+            // // TODO(edd): this is probably a bit of a perf suck.
+            // for (col_name, row_value) in &aggregate_row {
+            //     for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in
+            //         group_key_entry.iter_mut()
+            //     {
+            //         if col_name != cum_col_name {
+            //             continue;
+            //         }
+
+            //         // TODO(edd): remove unwrap - it should work because we are
+            //         // tracking iteration count in loop.
+            //         let row_value = row_value.as_ref().unwrap();
+
+            //         match cum_agg_value {
+            //             Some(agg) => match agg {
+            //                 column::Aggregate::Count(cum_count) => {
+            //                     *cum_count += 1;
+            //                 }
+            //                 column::Aggregate::Sum(cum_sum) => {
+            //                     *cum_sum += row_value;
+            //                 }
+            //             },
+            //             None => {
+            //                 *cum_agg_value = match agg_type {
+            //                     Aggregate::Count => Some(column::Aggregate::Count(0)),
+            //                     Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())),
+            //                 }
+            //             }
+            //         }
+            //     }
+            // }
+            processed_rows += 1;
+        }
+        // println!("{:?}", hash_table.len());
+        BTreeMap::new()
+    }
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
@@ -442,11 +681,11 @@ impl Segment {
         bm
     }
 
-    pub fn group_agg_by_predicate_eq(
+    pub fn group_single_agg_by_predicate_eq(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: &Vec<String>,
+        group_column: &String,
         aggregates: &Vec<(String, Aggregate)>,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         let mut grouped_results = BTreeMap::new();
@@ -459,7 +698,7 @@ impl Segment {
             }
         }
 
-        if let Some(grouped_row_ids) = self.group_by_column_ids(&group_columns[0]) {
+        if let Some(grouped_row_ids) = self.group_by_column_ids(group_column) {
             for (group_key_value, row_ids) in grouped_row_ids.iter() {
                 let mut filtered_row_ids = row_ids.and(&filter_row_ids);
                 if !filtered_row_ids.is_empty() {
@@ -630,9 +869,43 @@ impl<'a> Segments<'a> {
         }
 
         for segment in &self.segments {
-            segment.aggregate_by_groups(time_range, predicates, &group_columns, &aggregates);
+            //     // segment.aggregate_by_group_with_hash(
+            //     //     time_range,
+            //     //     predicates,
+            //     //     &group_columns,
+            //     //     &aggregates,
+            //     // );
+
+            segment.aggregate_by_group_with_sort(
+                time_range,
+                predicates,
+                &group_columns,
+                &aggregates,
+            );
         }
 
+        // let group_columns_arc = std::sync::Arc::new(group_columns);
+        // let aggregates_arc = std::sync::Arc::new(aggregates);
+
+        // for chunked_segments in self.segments.chunks(12) {
+        //     crossbeam::scope(|scope| {
+        //         for segment in chunked_segments {
+        //             let group_columns = group_columns_arc.clone();
+        //             let aggregates = aggregates_arc.clone();
+
+        //             scope.spawn(move |_| {
+        //                 segment.aggregate_by_group_with_sort(
+        //                     time_range,
+        //                     predicates,
+        //                     &group_columns,
+        //                     &aggregates,
+        //                 );
+        //             });
+        //         }
+        //     })
+        //     .unwrap();
+        // }
+
         let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
             BTreeMap::new();
 
diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs
index fb0add52e7..a78be80bc0 100644
--- a/delorean_table/src/sorter.rs
+++ b/delorean_table/src/sorter.rs
@@ -76,9 +76,14 @@ pub fn sort(packers: &mut [Packers], sort_by: &[usize]) -> Result<(), Error> {
         if sorted {
             return Ok(());
         }
+        // if packers_sorted_asc(packers, n, sort_by) {
+        //     return Ok(());
+        // }
+        // return Ok(());
     }
-
+    let now = std::time::Instant::now();
     quicksort_by(packers, 0..n - 1, sort_by);
+    println!("sorted in {:?}", now.elapsed());
     Ok(())
 }
 
@@ -152,9 +157,9 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
             Packers::Integer(p) => {
                 let cmp = p.get(a).cmp(&p.get(b));
                 if cmp != Ordering::Equal {
-                    // if cmp equal then try next packer column.
                     return cmp;
                 }
+                // if cmp equal then try next packer column.
             }
             _ => continue, // don't compare on non-string / timestamp cols
         }
@@ -162,6 +167,41 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     Ordering::Equal
 }
 
+fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> bool {
+    'row_wise: for i in 1..len {
+        for &idx in sort_by {
+            match &packers[idx] {
+                Packers::String(p) => {
+                    let vec = p.values();
+                    if vec[i - 1] < vec[i] {
+                        continue 'row_wise;
+                    } else if vec[i - 1] == vec[i] {
+                        // try next column
+                        continue;
+                    } else {
+                        // value is > so
+                        return false;
+                    }
+                }
+                Packers::Integer(p) => {
+                    let vec = p.values();
+                    if vec[i - 1] < vec[i] {
+                        continue 'row_wise;
+                    } else if vec[i - 1] == vec[i] {
+                        // try next column
+                        continue;
+                    } else {
+                        // value is > so
+                        return false;
+                    }
+                }
+                _ => continue, // don't compare on non-string / timestamp cols
+            }
+        }
+    }
+    true
+}
+
 // Swap the same pair of elements in each packer column
 fn swap(packers: &mut [Packers], a: usize, b: usize) {
     for p in packers {

From a5a8667a424e428f3a9481c3e5a306b8c8b54b85 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 14 Aug 2020 11:25:48 +0100
Subject: [PATCH 24/73] feat: group by sorting

---
 Cargo.lock                      |   6 +-
 delorean_mem_qe/Cargo.toml      |   5 +-
 delorean_mem_qe/src/bin/main.rs |  48 +++---
 delorean_mem_qe/src/column.rs   | 100 +++++++++--
 delorean_mem_qe/src/encoding.rs |   1 -
 delorean_mem_qe/src/lib.rs      |   1 +
 delorean_mem_qe/src/segment.rs  | 290 +++++++++++++-------------------
 delorean_mem_qe/src/sorter.rs   | 197 ++++++++++++++++++++++
 8 files changed, 436 insertions(+), 212 deletions(-)
 create mode 100644 delorean_mem_qe/src/sorter.rs

diff --git a/Cargo.lock b/Cargo.lock
index 4ccedecd78..fad93f1f3e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -769,6 +769,8 @@ dependencies = [
  "croaring",
  "crossbeam",
  "delorean_table",
+ "env_logger",
+ "log",
  "snafu",
 ]
 
@@ -1596,9 +1598,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.8"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
+checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
 dependencies = [
  "cfg-if",
 ]
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 7f8ac255a3..89b9ff1e9a 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -7,9 +7,6 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 
-[profile.release]
-debug = true
-
 [dependencies]
 delorean_table = { path = "../delorean_table" }
 arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
@@ -17,6 +14,8 @@ snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"
 chrono = "0.4"
+log = "0.4.11"
+env_logger = "0.7.1"
 
 [dev-dependencies]
 
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index aa3bbe4654..fa97b6e702 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -24,6 +24,8 @@ pub enum Error {
 }
 
 fn main() {
+    env_logger::init();
+
     let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
     let reader = ipc::reader::StreamReader::try_new(r).unwrap();
 
@@ -36,7 +38,7 @@ fn main() {
         store.size(),
     );
 
-    // time_group_by_agg(&store);
+    time_group_by_agg(&store);
 
     // time_column_min_time(&store);
     // time_column_max_time(&store);
@@ -107,20 +109,20 @@ fn main() {
     //     println!("ROWS ({}) {:?}", v, v.len());
     // }
 
-    loop {
-        let now = std::time::Instant::now();
-        let segments = store.segments();
-        let groups = segments.read_group_eq(
-            (0, 1890040790000000),
-            &[],
-            vec!["env".to_string(), "role".to_string()],
-            vec![
-                ("counter".to_string(), Aggregate::Sum),
-                // ("counter".to_string(), Aggregate::Count),
-            ],
-        );
-        println!("{:?} {:?}", groups, now.elapsed());
-    }
+    // loop {
+    //     let now = std::time::Instant::now();
+    //     let segments = store.segments();
+    //     let groups = segments.read_group_eq(
+    //         (0, 1590044410000000),
+    //         &[],
+    //         vec!["env".to_string(), "role".to_string()],
+    //         vec![
+    //             ("counter".to_string(), Aggregate::Sum),
+    //             // ("counter".to_string(), Aggregate::Count),
+    //         ],
+    //     );
+    //     println!("{:?} {:?}", groups, now.elapsed());
+    // }
 
     // loop {
     //     let mut total_count = 0.0;
@@ -159,12 +161,12 @@ fn build_store(
     mut reader: arrow::ipc::reader::StreamReader<File>,
     store: &mut Store,
 ) -> Result<(), Error> {
-    let mut i = 0;
+    // let mut i = 0;
     while let Some(rb) = reader.next_batch().unwrap() {
-        if i < 363 {
-            i += 1;
-            continue;
-        }
+        // if i < 363 {
+        //     i += 1;
+        //     continue;
+        // }
         let segment = convert_record_batch(rb)?;
         store.add_segment(segment);
     }
@@ -402,7 +404,7 @@ fn time_row_by_preds(store: &Store) {
 }
 
 fn time_group_by_agg(store: &Store) {
-    let repeat = 100;
+    let repeat = 10;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
     let mut total_max = 0;
     let segments = store.segments();
@@ -411,8 +413,8 @@ fn time_group_by_agg(store: &Store) {
 
         let groups = segments.read_group_eq(
             (0, 1590044410000000),
-            &[("method", Some(&column::Scalar::String("GET")))],
-            vec!["env".to_string(), "status".to_string()],
+            &[],
+            vec!["status".to_string(), "method".to_string()],
             vec![
                 ("counter".to_string(), Aggregate::Sum),
                 // ("counter".to_string(), Aggregate::Count),
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 08d95165b5..144c2bd6ec 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -9,6 +9,44 @@ pub enum Scalar<'a> {
     Integer(i64),
 }
 
+impl<'a> Scalar<'a> {
+    pub fn reset(&mut self) {
+        match self {
+            Scalar::String(s) => {
+                panic!("not supported");
+            }
+            Scalar::Float(v) => {
+                *v = 0.0;
+            }
+            Scalar::Integer(v) => {
+                *v = 0;
+            }
+        }
+    }
+
+    pub fn add(&mut self, other: Scalar<'a>) {
+        match self {
+            Self::Float(v) => {
+                if let Self::Float(other) = other {
+                    *v += other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::Integer(v) => {
+                if let Self::Integer(other) = other {
+                    *v += other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Self::String(_) => {
+                unreachable!("not possible to add strings");
+            }
+        }
+    }
+}
+
 impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
     type Output = Scalar<'a>;
 
@@ -65,6 +103,30 @@ pub enum Aggregate<'a> {
     Sum(Scalar<'a>),
 }
 
+impl<'a> Aggregate<'a> {
+    pub fn update_with(&mut self, other: Scalar<'a>) {
+        match self {
+            Self::Count(v) => {
+                *v = *v + 1;
+            }
+            Self::Sum(v) => {
+                v.add(other);
+            }
+        }
+    }
+}
+
+impl<'a> std::ops::Add<Scalar<'a>> for Aggregate<'a> {
+    type Output = Aggregate<'a>;
+
+    fn add(self, _rhs: Scalar<'a>) -> Self::Output {
+        match self {
+            Self::Count(c) => Self::Count(c + 1),
+            Self::Sum(s) => Self::Sum(s + &_rhs),
+        }
+    }
+}
+
 impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
     type Output = Aggregate<'a>;
 
@@ -170,17 +232,31 @@ impl<'a> Vector<'a> {
             }
         }
     }
+
+    pub fn swap(&mut self, a: usize, b: usize) {
+        match self {
+            Self::String(v) => {
+                v.swap(a, b);
+            }
+            Self::Float(v) => {
+                v.swap(a, b);
+            }
+            Self::Integer(v) => {
+                v.swap(a, b);
+            }
+        }
+    }
 }
 
 /// VectorIterator allows a `Vector` to be iterated. Until vectors are drained
 /// Scalar values are emitted.
 pub struct VectorIterator<'a> {
-    v: Vector<'a>,
+    v: &'a Vector<'a>,
     next_i: usize,
 }
 
 impl<'a> VectorIterator<'a> {
-    pub fn new(v: Vector<'a>) -> Self {
+    pub fn new(v: &'a Vector<'a>) -> Self {
         Self { v, next_i: 0 }
     }
 }
@@ -290,7 +366,7 @@ impl Column {
 
                 let now = std::time::Instant::now();
                 let v = c.values(row_ids);
-                println!("time getting decoded values for float {:?}", now.elapsed());
+                log::debug!("time getting decoded values for float {:?}", now.elapsed());
 
                 Vector::Float(v)
             }
@@ -301,7 +377,7 @@ impl Column {
 
                 let now = std::time::Instant::now();
                 let v = c.values(row_ids);
-                println!("time getting decoded values for int {:?}", now.elapsed());
+                log::debug!("time getting decoded values for int {:?}", now.elapsed());
                 Vector::Integer(v)
             }
         }
@@ -359,7 +435,7 @@ impl Column {
             .iter()
             .map(|v| *v as usize)
             .collect::<Vec<_>>();
-        println!("time unpacking bitmap {:?}", now.elapsed());
+        log::debug!("time unpacking bitmap {:?}", now.elapsed());
 
         match self {
             Column::String(c) => {
@@ -369,7 +445,7 @@ impl Column {
 
                 let now = std::time::Instant::now();
                 let v = c.encoded_values(&row_ids_vec);
-                println!("time getting encoded values {:?}", now.elapsed());
+                log::debug!("time getting encoded values {:?}", now.elapsed());
                 Vector::Integer(v)
             }
             Column::Float(c) => {
@@ -400,7 +476,9 @@ impl Column {
 
                 let now = std::time::Instant::now();
                 let v = c.encoded_values(&row_ids);
-                println!("time getting encoded values {:?}", now.elapsed());
+                log::debug!("time getting encoded values {:?}", now.elapsed());
+
+                log::debug!("dictionary {:?}", c.data.dictionary());
                 Vector::Integer(v)
             }
             Column::Float(c) => {
@@ -448,7 +526,7 @@ impl Column {
             .iter()
             .map(|v| *v as usize)
             .collect::<Vec<_>>();
-        println!("time unpacking bitmap {:?}", now.elapsed());
+        log::debug!("time unpacking bitmap {:?}", now.elapsed());
 
         assert!(
             row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0],
@@ -1060,9 +1138,11 @@ pub mod metadata {
 
         pub fn maybe_contains_value(&self, v: f64) -> bool {
             let res = self.range.0 <= v && v <= self.range.1;
-            println!(
+            log::debug!(
                 "column with ({:?}) maybe contain {:?} -- {:?}",
-                self.range, v, res
+                self.range,
+                v,
+                res
             );
             res
         }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index de7090115c..4cec3b25f6 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -297,7 +297,6 @@ impl DictionaryRLE {
 
             _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta
         }
-
         _self
     }
 
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 26b78d9963..150e5ad588 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -1,6 +1,7 @@
 pub mod column;
 pub mod encoding;
 pub mod segment;
+pub mod sorter;
 
 use segment::{Segment, Segments};
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index cf04671683..8e4189982d 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -238,7 +238,7 @@ impl Segment {
         // are aggregating on. For columns that have no matching rows from the
         // filtering stage we will just emit None.
         let mut aggregate_itrs = aggregate_column_decoded_values
-            .into_iter()
+            .iter()
             .map(|(col_name, values)| match values {
                 Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
                 None => (col_name.as_str(), None),
@@ -329,7 +329,7 @@ impl Segment {
             }
             processed_rows += 1;
         }
-        // println!("{:?}", hash_table.len());
+        log::debug!("{:?}", hash_table);
         BTreeMap::new()
     }
 
@@ -339,7 +339,7 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, Aggregate)],
-    ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -361,24 +361,17 @@ impl Segment {
         let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
-                let encoded_values: delorean_table::Packer<i64>;
-                if let column::Vector::Integer(vector) =
-                    column.encoded_values(&filtered_row_ids_vec)
-                {
-                    encoded_values = delorean_table::Packer::from(vector);
-                } else {
-                    unimplemented!("currently you can only group on encoded string columns");
-                }
-
+                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
-                    encoded_values.num_rows()
+                    encoded_values.len()
                 );
                 group_column_encoded_values.push(Some(encoded_values));
             } else {
                 group_column_encoded_values.push(None);
             }
         }
+        let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
         // println!("grouped columns {:?}", group_column_encoded_values);
 
         // TODO(edd): we could do this with an iterator I expect.
@@ -388,15 +381,10 @@ impl Segment {
         let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
         for (column_name, _) in aggregates {
             if let Some(column) = self.column(&column_name) {
-                let decoded_values = match column.values(&filtered_row_ids_vec) {
-                    column::Vector::String(_) => unreachable!("not supported"),
-                    column::Vector::Float(v) => delorean_table::Packers::from(v),
-                    column::Vector::Integer(v) => delorean_table::Packers::from(v),
-                };
-
+                let decoded_values = column.values(&filtered_row_ids_vec);
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
-                    decoded_values.num_rows()
+                    decoded_values.len()
                 );
                 aggregate_column_decoded_values.push((column_name, Some(decoded_values)));
             } else {
@@ -410,7 +398,7 @@ impl Segment {
 
         for gc in group_column_encoded_values {
             if let Some(p) = gc {
-                all_columns.push(delorean_table::Packers::Integer(p));
+                all_columns.push(p);
             } else {
                 panic!("need to handle no results for filtering/grouping...");
             }
@@ -426,142 +414,106 @@ impl Segment {
 
         // now sort on the first grouping columns. Right now the order doesn't matter...
         let now = std::time::Instant::now();
-        delorean_table::sorter::sort(&mut all_columns, &[0, 1]).unwrap();
-        println!("time checking sort {:?}", now.elapsed());
+        super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
+        log::debug!("time checking sort {:?}", now.elapsed());
 
         let mut group_itrs = all_columns
             .iter()
             .take(group_columns.len()) // only use grouping columns
-            .map(|x| match x {
-                delorean_table::Packers::Integer(p) => p.iter(),
-                _ => {
-                    panic!("not here {:?} ", x);
+            .map(|vector| {
+                if let column::Vector::Integer(v) = vector {
+                    v.iter()
+                } else {
+                    panic!("don't support grouping on non-encoded values");
                 }
             })
             .collect::<Vec<_>>();
 
+        let mut aggregate_itrs = all_columns
+            .iter()
+            .skip(group_columns.len()) // only use grouping columns
+            .map(|v| column::VectorIterator::new(v))
+            .collect::<Vec<_>>();
 
+        // this tracks the last seen group key row. When it changes we can emit
+        // the grouped aggregates.
+        let mut last_group_row = group_itrs
+            .iter_mut()
+            .map(|itr| itr.next().unwrap())
+            .collect::<Vec<_>>();
 
-        // now we have all the matching rows for each grouping column and each aggregation
-        // column. Materialised values for grouping are in encoded form.
-        //
-        // Next we sort all columns according to the group key.
-        // let mut all_columns = vec![];
-        // for
-        // delorean_table::packers::Packers
-        // // First we will build a collection of iterators over the columns we
-        // // are grouping on. For columns that have no matching rows from the
-        // // filtering stage we will just emit None.
+        let mut curr_group_row = last_group_row.clone();
 
-        // // Next we will build a collection of iterators over the columns we
-        // // are aggregating on. For columns that have no matching rows from the
-        // // filtering stage we will just emit None.
-        // let mut aggregate_itrs = aggregate_column_decoded_values
-        //     .into_iter()
-        //     .map(|(col_name, values)| match values {
-        //         Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
-        //         None => (col_name.as_str(), None),
-        //     })
-        //     .collect::<Vec<_>>();
-
-        // let mut hash_table: HashMap<
-        //     Vec<Option<&i64>>,
-        //     Vec<(&String, &Aggregate, Option<column::Aggregate>)>,
-        // > = HashMap::with_capacity(30000);
-
-        let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> = agg
-
-        let mut processed_rows = 0;
-        let last_group_row: Vec<Option<&i64>> = group_itrs
+        // this tracks the last row for each column we are aggregating.
+        let last_agg_row: Vec<column::Scalar> = aggregate_itrs
             .iter_mut()
             .map(|itr| itr.next().unwrap())
             .collect();
 
-        while processed_rows < *total_rows {
-            // let group_row: Vec<Option<&i64>> = group_itrs
-            //     .iter_mut()
-            //     .map(|x| match x {
-            //         Some(itr) => itr.next().unwrap(),
-            //         None => None,
-            //     })
-            //     .collect();
+        // this keeps the current cumulative aggregates for the columns we
+        // are aggregating.
+        let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates
+            .iter()
+            .zip(last_agg_row.iter())
+            .map(|((col_name, agg_type), curr_agg)| {
+                let agg = match agg_type {
+                    Aggregate::Count => column::Aggregate::Count(1),
+                    Aggregate::Sum => column::Aggregate::Sum(curr_agg.clone()),
+                };
+                (col_name.clone(), agg)
+            })
+            .collect();
 
-            // check if group key has changed
-            for (&curr_v, itr) in last_group_row.iter().zip(&mut group_itrs) {
-                if curr_v != itr.next().unwrap() {
-                    // group key changed
+        let mut results = BTreeMap::new();
+        let mut processed_rows = 1;
+        while processed_rows < *total_rows {
+            // update next group key.
+            let mut group_key_changed = false;
+            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
+                let next_v = itr.next().unwrap();
+                if curr_v != &next_v {
+                    group_key_changed = true;
+                }
+                *curr_v = next_v;
+            }
+
+            // group key changed - emit group row and aggregates.
+            if group_key_changed {
+                let key = last_group_row.clone();
+                results.insert(key, cum_aggregates.clone());
+
+                // update group key
+                last_group_row = curr_group_row.clone();
+
+                // reset cumulative aggregates
+                for (_, agg) in cum_aggregates.iter_mut() {
+                    match agg {
+                        column::Aggregate::Count(c) => {
+                            *c = 0;
+                        }
+                        column::Aggregate::Sum(s) => s.reset(),
+                    }
                 }
             }
 
-            // group key is the same - update aggregates
+            // update aggregates
+            for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) {
+                let (_, curr_agg) = bind.0;
+                let next_value = bind.1.next().unwrap();
+                curr_agg.update_with(next_value);
+            }
 
-            // // let aggregate_row: Vec<(&str, Option<column::Scalar>)> = aggregate_itrs
-            // //     .iter_mut()
-            // //     .map(|&mut (col_name, ref mut itr)| match itr {
-            // //         Some(itr) => (col_name, itr.next()),
-            // //         None => (col_name, None),
-            // //     })
-            // //     .collect();
-
-            // // re-use aggregate_row vector.
-            // for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
-            //     match itr {
-            //         Some(itr) => aggregate_row[i] = (col_name, itr.next()),
-            //         None => aggregate_row[i] = (col_name, None),
-            //     }
-            // }
-
-            // // Lookup the group key in the hash map - if it's empty then insert
-            // // a place-holder for each aggregate being executed.
-            // let group_key_entry = hash_table.entry(group_row).or_insert_with(|| {
-            //     // TODO COULD BE MAP/COLLECT
-            //     let mut agg_results: Vec<(&String, &Aggregate, Option<column::Aggregate>)> =
-            //         Vec::with_capacity(aggregates.len());
-            //     for (col_name, agg_type) in aggregates {
-            //         agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
-            //     }
-            //     agg_results
-            // });
-
-            // // Update aggregates - we process each row value and for each one
-            // // check which aggregates apply to it.
-            // //
-            // // TODO(edd): this is probably a bit of a perf suck.
-            // for (col_name, row_value) in &aggregate_row {
-            //     for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in
-            //         group_key_entry.iter_mut()
-            //     {
-            //         if col_name != cum_col_name {
-            //             continue;
-            //         }
-
-            //         // TODO(edd): remove unwrap - it should work because we are
-            //         // tracking iteration count in loop.
-            //         let row_value = row_value.as_ref().unwrap();
-
-            //         match cum_agg_value {
-            //             Some(agg) => match agg {
-            //                 column::Aggregate::Count(cum_count) => {
-            //                     *cum_count += 1;
-            //                 }
-            //                 column::Aggregate::Sum(cum_sum) => {
-            //                     *cum_sum += row_value;
-            //                 }
-            //             },
-            //             None => {
-            //                 *cum_agg_value = match agg_type {
-            //                     Aggregate::Count => Some(column::Aggregate::Count(0)),
-            //                     Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())),
-            //                 }
-            //             }
-            //         }
-            //     }
-            // }
             processed_rows += 1;
         }
-        // println!("{:?}", hash_table.len());
+
+        // Emit final row
+        results.insert(last_group_row, cum_aggregates);
+
+        log::debug!("{:?}", results);
+        // results
         BTreeMap::new()
     }
+
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
@@ -734,7 +686,7 @@ impl Segment {
                 } else {
                     // In this case there are grouped values in the column with no
                     // rows falling into time-range/predicate set.
-                    println!(
+                    log::error!(
                         "grouped value {:?} has no rows in time-range/predicate set",
                         group_key_value
                     );
@@ -742,7 +694,7 @@ impl Segment {
             }
         } else {
             // segment doesn't have the column so can't group on it.
-            println!("don't have column - can't group");
+            log::error!("don't have column - can't group");
         }
         grouped_results
     }
@@ -868,26 +820,36 @@ impl<'a> Segments<'a> {
             panic!("max <= min");
         }
 
+        //
+        //  TODO - just need to sum up the aggregates within each segment here to get
+        // the final result.
+        //
         for segment in &self.segments {
-            //     // segment.aggregate_by_group_with_hash(
-            //     //     time_range,
-            //     //     predicates,
-            //     //     &group_columns,
-            //     //     &aggregates,
-            //     // );
-
-            segment.aggregate_by_group_with_sort(
+            let now = std::time::Instant::now();
+            segment.aggregate_by_group_with_hash(
                 time_range,
                 predicates,
                 &group_columns,
                 &aggregates,
             );
+
+            // segment.aggregate_by_group_with_sort(
+            //     time_range,
+            //     predicates,
+            //     &group_columns,
+            //     &aggregates,
+            // );
+            log::info!(
+                "processed segment {:?} in {:?}",
+                segment.time_range(),
+                now.elapsed()
+            )
         }
 
         // let group_columns_arc = std::sync::Arc::new(group_columns);
         // let aggregates_arc = std::sync::Arc::new(aggregates);
 
-        // for chunked_segments in self.segments.chunks(12) {
+        // for chunked_segments in self.segments.chunks(16) {
         //     crossbeam::scope(|scope| {
         //         for segment in chunked_segments {
         //             let group_columns = group_columns_arc.clone();
@@ -906,38 +868,20 @@ impl<'a> Segments<'a> {
         //     .unwrap();
         // }
 
+        // let rem = self.segments.len() % 16;
+        // for segment in &self.segments[self.segments.len() - rem..] {
+        //     segment.aggregate_by_group_with_sort(
+        //         time_range,
+        //         predicates,
+        //         &group_columns_arc.clone(),
+        //         &aggregates_arc.clone(),
+        //     );
+        // }
+
+        // TODO(edd): merge results - not expensive really...
         let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
             BTreeMap::new();
 
-        // for segment in &self.segments {
-        //     let segment_results = segment.group_agg_by_predicate_eq(
-        //         time_range,
-        //         predicates,
-        //         &group_columns,
-        //         &aggregates,
-        //     );
-
-        //     for (k, segment_aggs) in segment_results {
-        //         // assert_eq!(v.len(), aggregates.len());
-        //         let cum_result = cum_results.get_mut(&k);
-        //         match cum_result {
-        //             Some(cum) => {
-        //                 assert_eq!(cum.len(), segment_aggs.len());
-        //                 // In this case we need to aggregate the aggregates from
-        //                 // each segment.
-        //                 for i in 0..cum.len() {
-        //                     // TODO(edd): this is more expensive than necessary
-        //                     cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1);
-        //                 }
-        //             }
-        //             None => {
-        //                 cum_results.insert(k, segment_aggs);
-        //             }
-        //         }
-        //     }
-        // }
-
-        // // columns
         cum_results
     }
 
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
new file mode 100644
index 0000000000..dff6d9657b
--- /dev/null
+++ b/delorean_mem_qe/src/sorter.rs
@@ -0,0 +1,197 @@
+//! The sorter module provides a sort function which will sort a collection of
+//! `Packer` columns by arbitrary columns. All sorting is done in ascending
+//! order.
+//!
+//! `sorter::sort` implements Quicksort using Hoare's partitioning scheme (how
+//! you choose the pivot). This partitioning scheme typically significantly
+//! reduces the number of swaps necessary but it does have some drawbacks.
+//!
+//! Firstly, the worse case runtime of this implementation is `O(n^2)` when the
+//! input set of columns are sorted according to the desired sort order. To
+//! avoid that behaviour, a heuristic is used for inputs over a certain size;
+//! large inputs are first linearly scanned to determine if the input is already
+//! sorted.
+//!
+//! Secondly, the sort produced using this partitioning scheme is not stable.
+//!
+use std::cmp::Ordering;
+use std::collections::BTreeSet;
+use std::ops::Range;
+
+use snafu::ensure;
+use snafu::Snafu;
+
+use super::column;
+
+#[derive(Snafu, Debug, Clone, Copy, PartialEq)]
+pub enum Error {
+    #[snafu(display(r#"Too many sort columns specified"#))]
+    TooManyColumns,
+
+    #[snafu(display(r#"Same column specified as sort column multiple times"#))]
+    RepeatedColumns { index: usize },
+
+    #[snafu(display(r#"Specified column index is out bounds"#))]
+    OutOfBoundsColumn { index: usize },
+}
+
+/// Any Packers inputs with more than this many rows will have a linear
+/// comparison scan performed on them to ensure they're not already sorted.
+const SORTED_CHECK_SIZE: usize = 1000;
+
+/// Sort a slice of `Packers` based on the provided column indexes.
+///
+/// All chosen columns will be sorted in ascending order; the sort is *not*
+/// stable.
+pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> {
+    if vectors.is_empty() || sort_by.is_empty() {
+        return Ok(());
+    }
+
+    ensure!(sort_by.len() <= vectors.len(), TooManyColumns);
+
+    let mut col_set = BTreeSet::new();
+    for &index in sort_by {
+        ensure!(col_set.insert(index), RepeatedColumns { index });
+    }
+
+    // TODO(edd): map first/last still unstable https://github.com/rust-lang/rust/issues/62924
+    if let Some(index) = col_set.range(vectors.len()..).next() {
+        return OutOfBoundsColumn { index: *index }.fail();
+    }
+
+    // Hoare's partitioning scheme can have quadratic runtime behaviour in
+    // the worst case when the inputs are already sorted. To avoid this, a
+    // check is added for large inputs.
+    let n = vectors[0].len();
+    if n > SORTED_CHECK_SIZE {
+        let mut sorted = true;
+        for i in 1..n {
+            if cmp(vectors, i - 1, i, sort_by) == Ordering::Greater {
+                sorted = false;
+                break;
+            }
+        }
+
+        if sorted {
+            log::debug!("columns already sorted");
+            return Ok(());
+        }
+        // if vectors_sorted_asc(vectors, n, sort_by) {
+        //     return Ok(());
+        // }
+    }
+    let now = std::time::Instant::now();
+    quicksort_by(vectors, 0..n - 1, sort_by);
+    log::debug!("sorted in {:?}", now.elapsed());
+    Ok(())
+}
+
+fn quicksort_by(vectors: &mut [column::Vector], range: Range<usize>, sort_by: &[usize]) {
+    if range.start >= range.end {
+        return;
+    }
+
+    let pivot = partition(vectors, &range, sort_by);
+    quicksort_by(vectors, range.start..pivot, sort_by);
+    quicksort_by(vectors, pivot + 1..range.end, sort_by);
+}
+
+fn partition(vectors: &mut [column::Vector], range: &Range<usize>, sort_by: &[usize]) -> usize {
+    let pivot = (range.start + range.end) / 2;
+    let (lo, hi) = (range.start, range.end);
+    if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less {
+        swap(vectors, lo as usize, pivot as usize);
+    }
+    if cmp(vectors, hi as usize, lo as usize, sort_by) == Ordering::Less {
+        swap(vectors, lo as usize, hi as usize);
+    }
+    if cmp(vectors, pivot as usize, hi as usize, sort_by) == Ordering::Less {
+        swap(vectors, hi as usize, pivot as usize);
+    }
+
+    let pivot = hi;
+    let mut i = range.start;
+    let mut j = range.end;
+
+    loop {
+        while cmp(vectors, i as usize, pivot as usize, sort_by) == Ordering::Less {
+            i += 1;
+        }
+
+        while cmp(vectors, j as usize, pivot as usize, sort_by) == Ordering::Greater {
+            j -= 1;
+        }
+
+        if i >= j {
+            return j;
+        }
+
+        swap(vectors, i as usize, j as usize);
+        i += 1;
+        j -= 1;
+    }
+}
+
+fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
+    for &idx in sort_by {
+        match &vectors[idx] {
+            column::Vector::String(p) => {
+                let cmp = p.get(a).cmp(&p.get(b));
+                if cmp != Ordering::Equal {
+                    return cmp;
+                }
+                // if cmp equal then try next vector.
+            }
+            column::Vector::Integer(p) => {
+                let cmp = p.get(a).cmp(&p.get(b));
+                if cmp != Ordering::Equal {
+                    return cmp;
+                }
+                // if cmp equal then try next vector.
+            }
+            _ => continue, // don't compare on non-string / timestamp cols
+        }
+    }
+    Ordering::Equal
+}
+
+fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool {
+    'row_wise: for i in 1..len {
+        for &idx in sort_by {
+            match &vectors[idx] {
+                column::Vector::String(vec) => {
+                    if vec[i - 1] < vec[i] {
+                        continue 'row_wise;
+                    } else if vec[i - 1] == vec[i] {
+                        // try next column
+                        continue;
+                    } else {
+                        // value is > so
+                        return false;
+                    }
+                }
+                column::Vector::Integer(vec) => {
+                    if vec[i - 1] < vec[i] {
+                        continue 'row_wise;
+                    } else if vec[i - 1] == vec[i] {
+                        // try next column
+                        continue;
+                    } else {
+                        // value is > so
+                        return false;
+                    }
+                }
+                _ => continue, // don't compare on non-string / timestamp cols
+            }
+        }
+    }
+    true
+}
+
+// Swap the same pair of elements in each packer column
+fn swap(vectors: &mut [column::Vector], a: usize, b: usize) {
+    for p in vectors {
+        p.swap(a, b);
+    }
+}

From 44aaddb8bd2c727f3e737f411d53ed9fad11c19a Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 14 Aug 2020 13:27:53 +0100
Subject: [PATCH 25/73] refactor: tidy up query runs

---
 delorean_mem_qe/src/bin/main.rs | 347 ++++++++++++++++++++------------
 delorean_mem_qe/src/segment.rs  | 252 ++++++++++++++++++-----
 2 files changed, 414 insertions(+), 185 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index fa97b6e702..d4ee92cc6f 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -5,7 +5,7 @@ use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
 use delorean_mem_qe::column::{Column, Scalar};
-use delorean_mem_qe::segment::{Aggregate, Segment};
+use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment};
 use delorean_mem_qe::Store;
 
 // use snafu::ensure;
@@ -38,7 +38,13 @@ fn main() {
         store.size(),
     );
 
-    time_group_by_agg(&store);
+    time_select_with_pred(&store);
+    time_first_host(&store);
+    time_sum_range(&store);
+    time_count_range(&store);
+    time_group_single_with_pred(&store);
+    time_group_by_multi_agg_count(&store);
+    time_group_by_multi_agg_SORTED_count(&store);
 
     // time_column_min_time(&store);
     // time_column_max_time(&store);
@@ -270,165 +276,242 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
     Ok(segment)
 }
 
-fn time_column_min_time(store: &Store) {
-    let repeat = 1000;
+//
+// SELECT FIRST(host) FROM measurement
+//
+fn time_first_host(store: &Store) {
+    let repeat = 100;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_min = 0;
-    for _ in 1..repeat {
-        let now = std::time::Instant::now();
-        let segments = store.segments();
-        let min = segments.column_min("time").unwrap();
-        total_time += now.elapsed();
-
-        if let Scalar::Integer(v) = min {
-            total_min += v
-        }
-    }
-    println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        total_min
-    );
-}
-
-fn time_column_max_time(store: &Store) {
-    let repeat = 1000;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
-    for _ in 1..repeat {
-        let now = std::time::Instant::now();
-        let segments = store.segments();
-        let max = segments.column_max("time").unwrap();
-        total_time += now.elapsed();
-
-        if let Scalar::Integer(v) = max {
-            total_max += v
-        }
-    }
-    println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        total_max
-    );
-}
-
-fn time_column_first(store: &Store) {
-    let repeat = 100000;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
-    for _ in 1..repeat {
-        let now = std::time::Instant::now();
-        let segments = store.segments();
-        let res = segments.first("host").unwrap();
-        total_time += now.elapsed();
-        total_max += res.0;
-    }
-    println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        total_max
-    );
-}
-
-// fn time_row_by_last_ts(store: &Store) {
-//     let repeat = 100000;
-//     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-//     let mut total_max = 0;
-//     let segments = store.segments();
-//     for _ in 0..repeat {
-//         let now = std::time::Instant::now();
-
-//         let (_, _, row_id) = segments.last("time").unwrap();
-//         let res = segments.segments().last().unwrap().row(row_id).unwrap();
-//         total_time += now.elapsed();
-//         total_max += res.len();
-//     }
-//     println!(
-//         "Ran {:?} in {:?} {:?} / call {:?}",
-//         repeat,
-//         total_time,
-//         total_time / repeat,
-//         total_max
-//     );
-// }
-
-fn time_row_by_preds(store: &Store) {
-    let repeat = 100000;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
+    let mut track = 0;
     let segments = store.segments();
     for _ in 0..repeat {
         let now = std::time::Instant::now();
 
-        let rows = segments
-            .segments()
-            .last()
-            .unwrap()
-            .filter_by_predicates_eq(
-                (1590040770000000, 1590040790000000),
-                &vec![
-                    ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
-                    ("method", Some(&column::Scalar::String("GET"))),
-                    (
-                        "host",
-                        Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
-                    ),
-                ],
-            )
-            .unwrap();
-
-        // for row_id in rows.iter() {
-        //     println!(
-        //         "{:?} - {:?}",
-        //         row_id,
-        //         segments.segments().last().unwrap().row(row_id as usize)
-        //     );
-        // }
+        let (ts, _, _) = segments.first("host").unwrap();
 
         total_time += now.elapsed();
-        total_max += rows.cardinality();
+        track += ts;
     }
     println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
+        "time_first_host ran {:?} in {:?} {:?} / call {:?}",
         repeat,
         total_time,
         total_time / repeat,
-        total_max
+        track
     );
 }
 
-fn time_group_by_agg(store: &Store) {
-    let repeat = 10;
+//
+// SELECT SUM(counter) FROM measurement
+// WHERE time >= "2020-05-07 06:48:00" AND time < "2020-05-21 07:00:10"
+//
+fn time_sum_range(store: &Store) {
+    let repeat = 100;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
+    let segments = store.segments();
+    let mut track = 0.0;
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        for segment in segments.segments() {
+            let filtered_ids =
+                segment.filter_by_predicates_eq((1588834080000000, 1590044410000000), &[]);
+            if let Some(mut row_ids) = filtered_ids {
+                if let column::Scalar::Float(v) =
+                    segment.sum_column("counter", &mut row_ids).unwrap()
+                {
+                    track += v;
+                }
+            }
+        }
+
+        total_time += now.elapsed();
+    }
+    println!(
+        "time_sum_range ran {:?} in {:?} {:?} / total {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
+//
+// SELECT COUNT(counter) FROM measurement
+// WHERE time >= "2020-05-07 06:48:00" AND time < "2020-05-21 07:00:10"
+//
+fn time_count_range(store: &Store) {
+    let repeat = 100;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
     let segments = store.segments();
     for _ in 0..repeat {
         let now = std::time::Instant::now();
 
-        let groups = segments.read_group_eq(
-            (0, 1590044410000000),
-            &[],
-            vec!["status".to_string(), "method".to_string()],
+        for segment in segments.segments() {
+            let filtered_ids =
+                segment.filter_by_predicates_eq((1588834080000000, 1590044410000000), &[]);
+            if let Some(mut row_ids) = filtered_ids {
+                track += segment.count_column("counter", &mut row_ids).unwrap();
+            }
+        }
+
+        total_time += now.elapsed();
+    }
+    println!(
+        "time_count_range ran {:?} in {:?} {:?} / total {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
+//
+// SELECT env, method, host, counter, time
+// FROM measurement
+// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
+// AND "env" = "prod01-eu-central-1"
+//
+fn time_select_with_pred(store: &Store) {
+    let repeat = 100;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let columns = segments.read_filter_eq(
+            (1590036110000000, 1590040770000000),
+            &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))],
             vec![
-                ("counter".to_string(), Aggregate::Sum),
-                // ("counter".to_string(), Aggregate::Count),
+                "env".to_string(),
+                "method".to_string(),
+                "host".to_string(),
+                "counter".to_string(),
+                "time".to_string(),
             ],
         );
 
         total_time += now.elapsed();
-        total_max += groups.len();
+        track += columns.len();
     }
     println!(
-        "Ran {:?} in {:?} {:?} / call {:?}",
+        "time_select_with_pred ran {:?} in {:?} {:?} / call {:?}",
         repeat,
         total_time,
         total_time / repeat,
-        total_max
+        track
     );
 }
+
+//
+// SELECT env, method, host, counter, time
+// FROM measurement
+// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
+// AND "env" = "prod01-eu-central-1"
+//
+fn time_group_single_with_pred(store: &Store) {
+    let repeat = 100;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        for segment in segments.segments() {
+            let results = segment.group_single_agg_by_predicate_eq(
+                (1588834080000000, 1590044410000000),
+                &[],
+                &"env".to_string(),
+                &vec![("counter".to_string(), Aggregate::Count)],
+            );
+            track += results.len();
+        }
+
+        total_time += now.elapsed();
+    }
+    println!(
+        "time_group_single_with_pred ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
+fn time_group_by_multi_agg_count(store: &Store) {
+    let strats = vec![
+        GroupingStrategy::HashGroup,
+        GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::SortGroup,
+        GroupingStrategy::SortGroupConcurrent,
+    ];
+
+    for strat in &strats {
+        let repeat = 10;
+        let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+        let mut total_max = 0;
+        let segments = store.segments();
+        for _ in 0..repeat {
+            let now = std::time::Instant::now();
+
+            let groups = segments.read_group_eq(
+                (1589000000000001, 1590044410000000),
+                &[],
+                vec!["status".to_string(), "method".to_string()],
+                vec![("counter".to_string(), Aggregate::Count)],
+                strat,
+            );
+
+            total_time += now.elapsed();
+            total_max += groups.len();
+        }
+        println!(
+            "time_group_by_multi_agg_count_{:?} ran {:?} in {:?} {:?} / call {:?}",
+            strat,
+            repeat,
+            total_time,
+            total_time / repeat,
+            total_max
+        );
+    }
+}
+
+fn time_group_by_multi_agg_SORTED_count(store: &Store) {
+    let strats = vec![
+        GroupingStrategy::HashGroup,
+        GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::SortGroup,
+        GroupingStrategy::SortGroupConcurrent,
+    ];
+
+    for strat in &strats {
+        let repeat = 10;
+        let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+        let mut total_max = 0;
+        let segments = store.segments();
+        for _ in 0..repeat {
+            let now = std::time::Instant::now();
+
+            let groups = segments.read_group_eq(
+                (1589000000000001, 1590044410000000),
+                &[],
+                vec!["env".to_string(), "role".to_string()],
+                vec![("counter".to_string(), Aggregate::Count)],
+                strat,
+            );
+
+            total_time += now.elapsed();
+            total_max += groups.len();
+        }
+        println!(
+            "time_group_by_multi_agg_SORTED_count_{:?} ran {:?} in {:?} {:?} / call {:?}",
+            strat,
+            repeat,
+            total_time,
+            total_time / repeat,
+            total_max
+        );
+    }
+}
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 8e4189982d..8f6c29b812 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -3,6 +3,9 @@ use std::collections::{BTreeMap, HashMap};
 use super::column;
 use super::column::Column;
 
+// Only used in a couple of specific places for experimentation.
+const THREADS: usize = 16;
+
 #[derive(Debug)]
 pub struct Segment {
     meta: SegmentMetaData,
@@ -137,12 +140,11 @@ impl Segment {
     pub fn group_by_column_ids(
         &self,
         name: &str,
-    ) -> Option<&std::collections::BTreeMap<Option<std::string::String>, croaring::Bitmap>> {
-        unimplemented!("just need to convert encoded keys into decoded strings");
-        // if let Some(c) = self.column(name) {
-        //     return Some(c.group_by_ids());
-        // }
-        // None
+    ) -> Option<&std::collections::BTreeMap<u32, croaring::Bitmap>> {
+        if let Some(c) = self.column(name) {
+            return Some(c.group_by_ids());
+        }
+        None
     }
 
     pub fn aggregate_by_group_with_hash(
@@ -639,7 +641,7 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_column: &String,
         aggregates: &Vec<(String, Aggregate)>,
-    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+    ) -> BTreeMap<u32, Vec<((String, Aggregate), column::Aggregate)>> {
         let mut grouped_results = BTreeMap::new();
 
         let filter_row_ids: croaring::Bitmap;
@@ -682,7 +684,7 @@ impl Segment {
                     // Next add these aggregates to the result set, keyed
                     // by the grouped value.
                     assert_eq!(aggs.len(), aggregates.len());
-                    grouped_results.insert(vec![group_key_value.clone().unwrap()], aggs);
+                    grouped_results.insert(*group_key_value, aggs);
                 } else {
                     // In this case there are grouped values in the column with no
                     // rows falling into time-range/predicate set.
@@ -814,16 +816,118 @@ impl<'a> Segments<'a> {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, Aggregate)>,
+        strategy: &GroupingStrategy,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
         }
 
-        //
-        //  TODO - just need to sum up the aggregates within each segment here to get
-        // the final result.
-        //
+        match strategy {
+            GroupingStrategy::HashGroup => {
+                return self.read_group_eq_hash(
+                    time_range,
+                    predicates,
+                    group_columns,
+                    aggregates,
+                    false,
+                )
+            }
+            GroupingStrategy::HashGroupConcurrent => {
+                return self.read_group_eq_hash(
+                    time_range,
+                    predicates,
+                    group_columns,
+                    aggregates,
+                    true,
+                )
+            }
+            GroupingStrategy::SortGroup => {
+                return self.read_group_eq_sort(
+                    time_range,
+                    predicates,
+                    group_columns,
+                    aggregates,
+                    false,
+                )
+            }
+            GroupingStrategy::SortGroupConcurrent => {
+                return self.read_group_eq_sort(
+                    time_range,
+                    predicates,
+                    group_columns,
+                    aggregates,
+                    true,
+                )
+            }
+        }
+
+        // TODO(edd): merge results - not expensive really...
+        let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
+            BTreeMap::new();
+
+        cum_results
+    }
+
+    fn read_group_eq_hash(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: Vec<String>,
+        aggregates: Vec<(String, Aggregate)>,
+        concurrent: bool,
+    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        if concurrent {
+            let group_columns_arc = std::sync::Arc::new(group_columns);
+            let aggregates_arc = std::sync::Arc::new(aggregates);
+
+            for chunked_segments in self.segments.chunks(THREADS) {
+                crossbeam::scope(|scope| {
+                    for segment in chunked_segments {
+                        let group_columns = group_columns_arc.clone();
+                        let aggregates = aggregates_arc.clone();
+
+                        scope.spawn(move |_| {
+                            let now = std::time::Instant::now();
+                            segment.aggregate_by_group_with_hash(
+                                time_range,
+                                predicates,
+                                &group_columns,
+                                &aggregates,
+                            );
+                            log::info!(
+                                "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                                segment.time_range(),
+                                now.elapsed()
+                            )
+                        });
+                    }
+                })
+                .unwrap();
+            }
+
+            let rem = self.segments.len() % THREADS;
+            for segment in &self.segments[self.segments.len() - rem..] {
+                let now = std::time::Instant::now();
+                segment.aggregate_by_group_with_hash(
+                    time_range,
+                    predicates,
+                    &group_columns_arc.clone(),
+                    &aggregates_arc.clone(),
+                );
+                log::info!(
+                    "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                    segment.time_range(),
+                    now.elapsed()
+                )
+            }
+
+            // TODO(edd): aggregate the aggregates. not expensive
+            return BTreeMap::new();
+        }
+
+        // Single threaded
+
         for segment in &self.segments {
             let now = std::time::Instant::now();
             segment.aggregate_by_group_with_hash(
@@ -832,57 +936,91 @@ impl<'a> Segments<'a> {
                 &group_columns,
                 &aggregates,
             );
-
-            // segment.aggregate_by_group_with_sort(
-            //     time_range,
-            //     predicates,
-            //     &group_columns,
-            //     &aggregates,
-            // );
             log::info!(
-                "processed segment {:?} in {:?}",
+                "processed segment {:?} using single-threaded hash-grouping in {:?}",
                 segment.time_range(),
                 now.elapsed()
             )
         }
 
-        // let group_columns_arc = std::sync::Arc::new(group_columns);
-        // let aggregates_arc = std::sync::Arc::new(aggregates);
+        BTreeMap::new()
+    }
 
-        // for chunked_segments in self.segments.chunks(16) {
-        //     crossbeam::scope(|scope| {
-        //         for segment in chunked_segments {
-        //             let group_columns = group_columns_arc.clone();
-        //             let aggregates = aggregates_arc.clone();
+    fn read_group_eq_sort(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: Vec<String>,
+        aggregates: Vec<(String, Aggregate)>,
+        concurrent: bool,
+    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        if concurrent {
+            let group_columns_arc = std::sync::Arc::new(group_columns);
+            let aggregates_arc = std::sync::Arc::new(aggregates);
 
-        //             scope.spawn(move |_| {
-        //                 segment.aggregate_by_group_with_sort(
-        //                     time_range,
-        //                     predicates,
-        //                     &group_columns,
-        //                     &aggregates,
-        //                 );
-        //             });
-        //         }
-        //     })
-        //     .unwrap();
-        // }
+            for chunked_segments in self.segments.chunks(THREADS) {
+                crossbeam::scope(|scope| {
+                    for segment in chunked_segments {
+                        let group_columns = group_columns_arc.clone();
+                        let aggregates = aggregates_arc.clone();
 
-        // let rem = self.segments.len() % 16;
-        // for segment in &self.segments[self.segments.len() - rem..] {
-        //     segment.aggregate_by_group_with_sort(
-        //         time_range,
-        //         predicates,
-        //         &group_columns_arc.clone(),
-        //         &aggregates_arc.clone(),
-        //     );
-        // }
+                        scope.spawn(move |_| {
+                            let now = std::time::Instant::now();
+                            segment.aggregate_by_group_with_sort(
+                                time_range,
+                                predicates,
+                                &group_columns,
+                                &aggregates,
+                            );
+                            log::info!(
+                                "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                                segment.time_range(),
+                                now.elapsed()
+                            )
+                        });
+                    }
+                })
+                .unwrap();
+            }
 
-        // TODO(edd): merge results - not expensive really...
-        let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
-            BTreeMap::new();
+            let rem = self.segments.len() % THREADS;
+            for segment in &self.segments[self.segments.len() - rem..] {
+                let now = std::time::Instant::now();
+                segment.aggregate_by_group_with_sort(
+                    time_range,
+                    predicates,
+                    &group_columns_arc.clone(),
+                    &aggregates_arc.clone(),
+                );
+                log::info!(
+                    "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                    segment.time_range(),
+                    now.elapsed()
+                )
+            }
 
-        cum_results
+            // TODO(edd): aggregate the aggregates. not expensive
+            return BTreeMap::new();
+        }
+
+        // Single threaded
+
+        for segment in &self.segments {
+            let now = std::time::Instant::now();
+            segment.aggregate_by_group_with_sort(
+                time_range,
+                predicates,
+                &group_columns,
+                &aggregates,
+            );
+            log::info!(
+                "processed segment {:?} using single-threaded hash-grouping in {:?}",
+                segment.time_range(),
+                now.elapsed()
+            )
+        }
+
+        BTreeMap::new()
     }
 
     /// Returns the minimum value for a column in a set of segments.
@@ -936,7 +1074,7 @@ impl<'a> Segments<'a> {
     ///
     /// TODO(edd): could return NULL value..
     pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>, usize)> {
-        // First let's find the segment with the latest time range.
+        // First let's find the segment with the earliest time range.
         // notice we order  a < b on max time range.
         let segment = self
             .segments
@@ -992,5 +1130,13 @@ impl<'a> Segments<'a> {
     }
 }
 
+#[derive(Debug)]
+pub enum GroupingStrategy {
+    HashGroup,
+    HashGroupConcurrent,
+    SortGroup,
+    SortGroupConcurrent,
+}
+
 #[cfg(test)]
 mod test {}

From 7f815099d051b562d08a4dbd69d3a82f210995b2 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Tue, 18 Aug 2020 14:10:18 -0400
Subject: [PATCH 26/73] feat: Read from parquet rather than arrow

---
 Cargo.lock                      |  1 +
 delorean_mem_qe/Cargo.toml      |  4 ++--
 delorean_mem_qe/src/bin/main.rs | 26 +++++++++++++++++---------
 delorean_mem_qe/src/column.rs   | 12 ++++++------
 delorean_mem_qe/src/encoding.rs |  2 +-
 delorean_mem_qe/src/segment.rs  |  6 +++---
 delorean_mem_qe/src/sorter.rs   |  1 +
 delorean_table/src/sorter.rs    |  1 +
 8 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fad93f1f3e..53b07ad87f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -771,6 +771,7 @@ dependencies = [
  "delorean_table",
  "env_logger",
  "log",
+ "parquet 2.0.0-SNAPSHOT",
  "snafu",
 ]
 
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 89b9ff1e9a..d569025cef 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -9,7 +9,8 @@ edition = "2018"
 
 [dependencies]
 delorean_table = { path = "../delorean_table" }
-arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } 
+arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
+parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
 snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"
@@ -18,4 +19,3 @@ log = "0.4.11"
 env_logger = "0.7.1"
 
 [dev-dependencies]
-
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index d4ee92cc6f..9ee787ac8c 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -1,12 +1,13 @@
-use std::{fs::File, path::Path};
+use std::{fs::File, rc::Rc};
 
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array, array::Array, datatypes, ipc};
+use arrow::{array, array::Array, datatypes};
 
 use delorean_mem_qe::column;
-use delorean_mem_qe::column::{Column, Scalar};
+use delorean_mem_qe::column::{Column};
 use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment};
 use delorean_mem_qe::Store;
+use parquet::arrow::arrow_reader::ArrowReader;
 
 // use snafu::ensure;
 use snafu::Snafu;
@@ -26,11 +27,18 @@ pub enum Error {
 fn main() {
     env_logger::init();
 
-    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
-    let reader = ipc::reader::StreamReader::try_new(r).unwrap();
+    //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
+    println!("Opening the file....");
+    let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet").unwrap();
+    let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
+    let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
+    let batch_size = 60000;
+    let record_batch_reader = reader.get_record_reader(batch_size).unwrap();
+
+    //let reader = ipc::reader::StreamReader::try_new(r).unwrap();
 
     let mut store = Store::default();
-    build_store(reader, &mut store).unwrap();
+    build_store(record_batch_reader, &mut store).unwrap();
 
     println!(
         "total segments {:?} with total size {:?}",
@@ -44,7 +52,7 @@ fn main() {
     time_count_range(&store);
     time_group_single_with_pred(&store);
     time_group_by_multi_agg_count(&store);
-    time_group_by_multi_agg_SORTED_count(&store);
+    time_group_by_multi_agg_sorted_count(&store);
 
     // time_column_min_time(&store);
     // time_column_max_time(&store);
@@ -164,7 +172,7 @@ fn main() {
 }
 
 fn build_store(
-    mut reader: arrow::ipc::reader::StreamReader<File>,
+    mut reader: impl RecordBatchReader,
     store: &mut Store,
 ) -> Result<(), Error> {
     // let mut i = 0;
@@ -478,7 +486,7 @@ fn time_group_by_multi_agg_count(store: &Store) {
     }
 }
 
-fn time_group_by_multi_agg_SORTED_count(store: &Store) {
+fn time_group_by_multi_agg_sorted_count(store: &Store) {
     let strats = vec![
         GroupingStrategy::HashGroup,
         GroupingStrategy::HashGroupConcurrent,
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 144c2bd6ec..e8bd8bd700 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -12,7 +12,7 @@ pub enum Scalar<'a> {
 impl<'a> Scalar<'a> {
     pub fn reset(&mut self) {
         match self {
-            Scalar::String(s) => {
+            Scalar::String(_s) => {
                 panic!("not supported");
             }
             Scalar::Float(v) => {
@@ -509,10 +509,10 @@ impl Column {
                 // FIX THIS UNWRAP AND HOPE THERE ARE NO NULL VALUES!
                 c.decode_id(encoded_id).unwrap()
             }
-            Column::Float(c) => {
+            Column::Float(_c) => {
                 unreachable!("this isn't supported right now");
             }
-            Column::Integer(c) => {
+            Column::Integer(_c) => {
                 unreachable!("this isn't supported right now");
             }
         }
@@ -542,7 +542,7 @@ impl Column {
     }
 
     /// materialise all rows including and after row_id
-    pub fn scan_from(&self, row_id: usize) -> Option<Vector> {
+    pub fn scan_from(&self, _row_id: usize) -> Option<Vector> {
         unimplemented!("todo");
         // if row_id >= self.num_rows() {
         //     println!(
@@ -764,7 +764,7 @@ impl Column {
     //      WHERE counter >= 102.2 AND counter < 2929.32
     pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option<croaring::Bitmap> {
         match self {
-            Column::String(c) => {
+            Column::String(_c) => {
                 unimplemented!("not implemented yet");
             }
             Column::Float(c) => {
@@ -926,7 +926,7 @@ impl String {
         self.data.scan_from(row_id)
     }
 
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<&std::string::String> {
+    pub fn scan_from_until_some(&self, _row_id: usize) -> Option<&std::string::String> {
         unreachable!("don't need this");
         // self.data.scan_from_until_some(row_id)
     }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 4cec3b25f6..6d9ce4e4f8 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -68,7 +68,7 @@ where
     }
 
     // TODO(edd): fix this when added NULL support
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<T> {
+    pub fn scan_from_until_some(&self, _row_id: usize) -> Option<T> {
         unreachable!("to remove");
         // for v in self.values.iter().skip(row_id) {
         //     return Some(*v);
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 8f6c29b812..2bbbdea2f9 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -863,10 +863,10 @@ impl<'a> Segments<'a> {
         }
 
         // TODO(edd): merge results - not expensive really...
-        let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
-            BTreeMap::new();
+        // let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
+        //     BTreeMap::new();
 
-        cum_results
+        // cum_results
     }
 
     fn read_group_eq_hash(
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
index dff6d9657b..c8b01a3432 100644
--- a/delorean_mem_qe/src/sorter.rs
+++ b/delorean_mem_qe/src/sorter.rs
@@ -156,6 +156,7 @@ fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ord
     Ordering::Equal
 }
 
+#[allow(dead_code)]
 fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool {
     'row_wise: for i in 1..len {
         for &idx in sort_by {
diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs
index a78be80bc0..7911b1b310 100644
--- a/delorean_table/src/sorter.rs
+++ b/delorean_table/src/sorter.rs
@@ -167,6 +167,7 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     Ordering::Equal
 }
 
+#[allow(dead_code)]
 fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> bool {
     'row_wise: for i in 1..len {
         for &idx in sort_by {

From acfef35a0e71a90cbe1d6aba9da2cec917ac9f90 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Wed, 19 Aug 2020 12:55:09 -0400
Subject: [PATCH 27/73] feat: load segments from parquet

---
 Cargo.lock | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 53b07ad87f..ec0e71d15c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -653,7 +653,7 @@ dependencies = [
  "crossbeam",
  "fnv",
  "num_cpus",
- "parquet",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "paste",
  "rustyline",
  "sqlparser",
@@ -716,7 +716,7 @@ version = "0.1.0"
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "datafusion",
- "parquet",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
 ]
 
 [[package]]
@@ -771,7 +771,7 @@ dependencies = [
  "delorean_table",
  "env_logger",
  "log",
- "parquet 2.0.0-SNAPSHOT",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
  "snafu",
 ]
 
@@ -2048,6 +2048,25 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "parquet"
+version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
+dependencies = [
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "brotli",
+ "byteorder",
+ "chrono",
+ "flate2",
+ "lz4",
+ "num-bigint 0.3.0",
+ "parquet-format",
+ "serde_json",
+ "snap",
+ "thrift",
+ "zstd",
+]
+
 [[package]]
 name = "parquet-format"
 version = "2.6.1"

From 820277a52983b1c168cacb1332354c9b70e2b532 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Wed, 19 Aug 2020 12:55:09 -0400
Subject: [PATCH 28/73] feat: load segments from parquet

---
 Cargo.lock                      | 19 ++++++++++
 delorean_mem_qe/Cargo.toml      |  9 ++++-
 delorean_mem_qe/src/bin/main.rs | 66 +++++++++++++++++++++++++--------
 delorean_mem_qe/src/column.rs   |  2 +-
 delorean_mem_qe/src/lib.rs      |  3 +-
 delorean_mem_qe/src/segment.rs  |  1 +
 6 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ec0e71d15c..d2c35bfd59 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -125,6 +125,24 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "arrow"
+version = "2.0.0-SNAPSHOT"
+dependencies = [
+ "chrono",
+ "csv",
+ "flatbuffers",
+ "hex",
+ "indexmap",
+ "lazy_static",
+ "num 0.3.0",
+ "rand",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+]
+
 [[package]]
 name = "arrow-flight"
 version = "2.0.0-SNAPSHOT"
@@ -770,6 +788,7 @@ dependencies = [
  "crossbeam",
  "delorean_table",
  "env_logger",
+ "human_format",
  "log",
  "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
  "snafu",
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index d569025cef..e5024b7932 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -9,13 +9,18 @@ edition = "2018"
 
 [dependencies]
 delorean_table = { path = "../delorean_table" }
-arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
-parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
+#arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
+arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
+#parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
+parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
 snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"
 chrono = "0.4"
 log = "0.4.11"
 env_logger = "0.7.1"
+human_format = "1.0.3"
+
+
 
 [dev-dependencies]
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 9ee787ac8c..02d9752db3 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -1,4 +1,4 @@
-use std::{fs::File, rc::Rc};
+use std::{fs, fs::File, rc::Rc, path::PathBuf};
 
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
 use arrow::{array, array::Array, datatypes};
@@ -11,6 +11,7 @@ use parquet::arrow::arrow_reader::ArrowReader;
 
 // use snafu::ensure;
 use snafu::Snafu;
+use datatypes::TimeUnit;
 
 #[derive(Snafu, Debug, Clone, Copy, PartialEq)]
 pub enum Error {
@@ -24,12 +25,23 @@ pub enum Error {
 // OutOfBoundsColumn { index: usize },
 }
 
+fn format_size(sz: usize) -> String {
+    human_format::Formatter::new().format(sz as f64)
+}
+
+
 fn main() {
     env_logger::init();
 
     //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
-    println!("Opening the file....");
-    let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet").unwrap();
+    // This one was having issues being read into arrow with the last row groups
+    let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet");
+    let r = File::open(&path).unwrap();
+    let file_size = fs::metadata(&path).expect("read metadata").len();
+    println!("Reading {} ({}) bytes of parquet from {:?}....",
+             format_size(file_size as usize), file_size, path);
+
+    //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap();
     let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
     let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
     let batch_size = 60000;
@@ -41,9 +53,10 @@ fn main() {
     build_store(record_batch_reader, &mut store).unwrap();
 
     println!(
-        "total segments {:?} with total size {:?}",
+        "total segments {:?} with total size {} ({})",
         store.segment_total(),
-        store.size(),
+        format_size(store.size()),
+        store.size()
     );
 
     time_select_with_pred(&store);
@@ -175,22 +188,34 @@ fn build_store(
     mut reader: impl RecordBatchReader,
     store: &mut Store,
 ) -> Result<(), Error> {
-    // let mut i = 0;
-    while let Some(rb) = reader.next_batch().unwrap() {
-        // if i < 363 {
-        //     i += 1;
-        //     continue;
-        // }
-        let segment = convert_record_batch(rb)?;
-        store.add_segment(segment);
+    let mut total_rows_read = 0;
+    let start = std::time::Instant::now();
+    loop {
+        let rb = reader.next_batch();
+        match rb {
+            Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
+            Ok(Some(rb)) => {
+                // if i < 363 {
+                //     i += 1;
+                //     continue;
+                // }
+                total_rows_read += rb.num_rows();
+                let segment = convert_record_batch(rb)?;
+                store.add_segment(segment);
+            },
+            Ok(None) => {
+                let now = std::time::Instant::now();
+                println!("Completed loading {} rows in {:?}", total_rows_read, now - start);
+                return Ok(())
+            }
+        }
     }
-    Ok(())
 }
 
 fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
     let mut segment = Segment::new(rb.num_rows());
 
-    // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
+    println!("Loading record batch: cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
     for (i, column) in rb.columns().iter().enumerate() {
         match *column.data_type() {
             datatypes::DataType::Float64 => {
@@ -214,6 +239,15 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
             }
+            datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => {
+                if column.null_count() > 0 {
+                    panic!("null times");
+                }
+                let arr = column.as_any().downcast_ref::<array::TimestampMicrosecondArray>().unwrap();
+
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), column);
+            }
             datatypes::DataType::Utf8 => {
                 let arr = column
                     .as_any()
@@ -278,7 +312,7 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
             datatypes::DataType::Boolean => {
                 panic!("unsupported");
             }
-            _ => panic!("unsupported datatype"),
+            ref d @ _ => panic!("unsupported datatype: {:?}", d),
         }
     }
     Ok(segment)
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index e8bd8bd700..a2ac7e83c2 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -310,7 +310,7 @@ impl Column {
         }
     }
 
-    // Returns the size of the segment in bytes.
+    // Returns the size of the column in bytes.
     pub fn size(&self) -> usize {
         match self {
             Column::String(c) => c.size(),
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 150e5ad588..5fa99ebb1a 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -9,6 +9,7 @@ use segment::{Segment, Segments};
 pub struct Store {
     segments: Vec<Segment>,
 
+    /// Total size of the store, in bytes
     store_size: usize,
 }
 
@@ -18,7 +19,7 @@ impl Store {
         self.segments.push(segment);
     }
 
-    /// The total size of all segments in the store.s
+    /// The total size of all segments in the store, in bytes.
     pub fn size(&self) -> usize {
         self.store_size
     }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 2bbbdea2f9..079b8b3dcd 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -71,6 +71,7 @@ impl Segment {
     }
 
     // TODO - iterator....
+    /// Returns the size of the segment in bytes.
     pub fn size(&self) -> usize {
         let mut size = 0;
         for c in &self.columns {

From 41899203d9e8ba21a555965b81cd4c946f35a4a9 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Wed, 19 Aug 2020 13:52:27 -0400
Subject: [PATCH 29/73] refactor: implement a prototype datafusion integration
 layer demonstration

---
 Cargo.lock                      |   2 +
 delorean_mem_qe/Cargo.toml      |   2 +
 delorean_mem_qe/src/adapter.rs  | 333 ++++++++++++++++++++++++++++++++
 delorean_mem_qe/src/bin/main.rs |  95 +++++++--
 delorean_mem_qe/src/lib.rs      |  11 ++
 delorean_mem_qe/src/segment.rs  |  17 +-
 6 files changed, 436 insertions(+), 24 deletions(-)
 create mode 100644 delorean_mem_qe/src/adapter.rs

diff --git a/Cargo.lock b/Cargo.lock
index d2c35bfd59..e25b863d24 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -136,6 +136,7 @@ dependencies = [
  "indexmap",
  "lazy_static",
  "num 0.3.0",
+ "prettytable-rs",
  "rand",
  "regex",
  "serde",
@@ -786,6 +787,7 @@ dependencies = [
  "chrono",
  "croaring",
  "crossbeam",
+ "datafusion",
  "delorean_table",
  "env_logger",
  "human_format",
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index e5024b7932..1b79c94649 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -13,6 +13,8 @@ delorean_table = { path = "../delorean_table" }
 arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
 #parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
 parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
+#datafusion = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
+datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" }
 snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"
diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
new file mode 100644
index 0000000000..90138cbd29
--- /dev/null
+++ b/delorean_mem_qe/src/adapter.rs
@@ -0,0 +1,333 @@
+//! Code for interfacing and running queries in DataFusion
+
+use crate::Store;
+use arrow::{
+    datatypes::{Schema, SchemaRef},
+    util::pretty, record_batch::{RecordBatch, RecordBatchReader},
+};
+use datafusion::prelude::*;
+use datafusion::{
+    datasource::TableProvider,
+    execution::{
+        context::ExecutionContextState,
+        physical_plan::{ExecutionPlan, Partition, common::RecordBatchIterator},
+    },
+    logicalplan::{make_logical_plan_node, Expr, LogicalPlan},
+    lp::LogicalPlanNode,
+    optimizer::utils,
+};
+
+use std::{
+    fmt,
+    sync::{Arc, Mutex},
+};
+use crate::column;
+
+/// Wrapper to adapt a Store to a DataFusion "TableProvider" --
+/// eventually we could also implement this directly on Store
+pub struct StoreTableSource {
+    store: Arc<Store>,
+}
+
+impl<'a> StoreTableSource {
+    pub fn new(store: Arc<Store>) -> Self {
+        Self { store }
+    }
+}
+
+impl TableProvider for StoreTableSource {
+    /// Get a reference to the schema for this table
+    fn schema(&self) -> SchemaRef {
+        self.store.schema()
+    }
+
+    /// Perform a scan of a table and return a sequence of iterators over the data (one
+    /// iterator per partition)
+    fn scan(
+        &self,
+        _projection: &Option<Vec<usize>>,
+        _batch_size: usize,
+    ) -> datafusion::error::Result<Vec<Arc<dyn Partition>>> {
+        unimplemented!("scan not yet implemented");
+    }
+}
+
+/// Prototype of how a Delorean query engine, built on top of
+/// DataFusion, but using specialized column store operators might
+/// look like.
+///
+/// Data from the Segments in the `store` are visible in DataFusion
+/// as a table ("measurement") in this prototype.
+pub struct DeloreanQueryEngine {
+    ctx: ExecutionContext,
+    store: Arc<Store>,
+}
+
+impl DeloreanQueryEngine {
+    pub fn new(store: Arc<Store>) -> Self {
+        let start = std::time::Instant::now();
+        let mut ctx = ExecutionContext::new();
+        let source = StoreTableSource::new(store.clone());
+        let source = Box::new(source);
+        ctx.register_table("measurement", source);
+        println!("Completed setup in {:?}", start.elapsed());
+        DeloreanQueryEngine { ctx, store }
+    }
+
+    // Run the specified SQL and return the number of records matched
+    pub fn run_sql(&mut self, sql: &str) -> usize {
+        let plan = self
+            .ctx
+            .create_logical_plan(sql)
+            .expect("Creating the logical plan");
+
+        //println!("Created logical plan:\n{:?}", plan);
+        let plan = self.rewrite_to_segment_scan(&plan);
+        //println!("Rewritten logical plan:\n{:?}", plan);
+
+        match self.ctx.collect_plan(&plan) {
+            Err(err) => {
+                println!("Error running query: {:?}", err);
+                0
+            }
+            Ok(results) => {
+                if results.is_empty() {
+                    //println!("Empty result returned");
+                    0
+                } else {
+                    pretty::print_batches(&results).expect("printing");
+                    results.iter().map(|b| b.num_rows()).sum()
+                }
+            }
+        }
+    }
+
+    /// Specialized optimizer pass that combines a `TableScan` and a `Filter`
+    /// together into a SegementStore with the predicates.
+    ///
+    /// For example, given this input:
+    ///
+    /// Projection: #env, #method, #host, #counter, #time
+    ///   Filter: #time GtEq Int64(1590036110000000)
+    ///     TableScan: measurement projection=None
+    ///
+    /// The following plan would be produced
+    /// Projection: #env, #method, #host, #counter, #time
+    ///   SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000)
+    ///
+    fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan {
+        if let LogicalPlan::Filter { predicate, input } = plan {
+            // see if the input is a TableScan
+            if let LogicalPlan::TableScan { .. } = **input
+            {
+                return make_logical_plan_node(Box::new(SegmentScan::new(
+                    self.store.clone(),
+                    predicate.clone(),
+                )));
+            }
+        }
+
+        // otherwise recursively apply
+        let optimized_inputs = utils::inputs(&plan)
+            .iter()
+            .map(|input| self.rewrite_to_segment_scan(input))
+            .collect();
+
+        return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs)
+            .expect("Created plan");
+    }
+}
+
+/// LogicalPlan node that serves as a scan of the segment store with optional predicates
+struct SegmentScan {
+    /// The underlying Store
+    store: Arc<Store>,
+
+    schema: SchemaRef,
+
+    /// The predicate to apply during the scan
+    predicate: Expr,
+}
+
+impl SegmentScan {
+    fn new(store: Arc<Store>, predicate: Expr) -> Self {
+        let schema = store.schema().clone();
+
+        SegmentScan {
+            store,
+            schema,
+            predicate,
+        }
+    }
+
+}
+
+impl LogicalPlanNode for SegmentScan {
+    /// Return  a reference to the logical plan's inputs
+    fn inputs(&self) -> Vec<&LogicalPlan> {
+        Vec::new()
+    }
+
+    /// Get a reference to the logical plan's schema
+    fn schema(&self) -> &Schema {
+        self.schema.as_ref()
+    }
+
+    /// returns all expressions (non-recursively) in the current logical plan node.
+    fn expressions(&self) -> Vec<Expr> {
+        // The predicate expression gets absorbed by this node As
+        // there are no inputs, there are no exprs that operate on
+        // inputs
+        Vec::new()
+    }
+
+    /// Write a single line human readable string to `f` for use in explain plan
+    fn format_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "SegmentScan: {:?} predicate {:?}",
+            self.store.as_ref() as *const Store,
+            self.predicate
+        )
+    }
+
+    /// Create a clone of this node.
+    ///
+    /// Note std::Clone needs a Sized type, so we must implement a
+    /// clone that creates a node with a known Size (i.e. Box)
+    //
+    fn dyn_clone(&self) -> Box<dyn LogicalPlanNode> {
+        Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
+    }
+
+    /// Create a clone of this LogicalPlanNode with inputs and expressions replaced.
+    ///
+    /// Note that exprs and inputs are in the same order as the result
+    /// of self.inputs and self.exprs.
+    ///
+    /// So, clone_from_template(exprs).exprs() == exprs
+    fn clone_from_template(
+        &self,
+        exprs: &Vec<Expr>,
+        inputs: &Vec<LogicalPlan>,
+    ) -> Box<dyn LogicalPlanNode> {
+        assert_eq!(exprs.len(), 0, "no exprs expected");
+        assert_eq!(inputs.len(), 0, "no inputs expected");
+        Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
+    }
+
+    /// Create the corresponding physical scheplan for this node
+    fn create_physical_plan(
+        &self,
+        input_physical_plans: Vec<Arc<dyn ExecutionPlan>>,
+        _ctx_state: Arc<Mutex<ExecutionContextState>>,
+    ) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
+        assert_eq!(input_physical_plans.len(), 0, "Can not have inputs");
+
+        // If this were real code, we would now progrmatically
+        // transform the DataFusion Expr into the specific form needed
+        // by the Segment. However, to save prototype time we just
+        // hard code it here instead
+        assert_eq!(
+            format!("{:?}", self.predicate),
+            "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")"
+        );
+
+        let time_range = (1590036110000000, 1590040770000000);
+        let string_predicate = StringPredicate {
+            col_name: "env".into(),
+            value: "prod01-eu-central-1".into()
+        };
+
+        Ok(Arc::new(SegmentScanExec::new(
+            self.store.clone(),
+            time_range,
+            string_predicate,
+        )))
+    }
+
+}
+
+
+#[derive(Debug, Clone)]
+struct StringPredicate {
+    col_name: String,
+    value: String,
+}
+
+/// StoreScan execution node
+#[derive(Debug)]
+pub struct SegmentScanExec {
+    store: Arc<Store>,
+
+    // Specialized predicates to apply
+    time_range: (i64, i64),
+    string_predicate: StringPredicate
+
+}
+
+impl SegmentScanExec {
+    fn new(store: Arc<Store>, time_range: (i64, i64), string_predicate: StringPredicate) -> Self {
+        SegmentScanExec { store , time_range, string_predicate }
+    }
+}
+
+impl ExecutionPlan for SegmentScanExec {
+    fn schema(&self) -> SchemaRef {
+        self.store.schema()
+    }
+
+    fn partitions(&self) -> datafusion::error::Result<Vec<Arc<dyn Partition>>> {
+        let store = self.store.clone();
+        Ok(vec![Arc::new(SegmentPartition{
+            store,
+            time_range: self.time_range,
+            string_predicate: self.string_predicate.clone(),
+        })])
+    }
+}
+
+#[derive(Debug)]
+struct SegmentPartition {
+    store: Arc<Store>,
+    time_range: (i64, i64),
+    string_predicate: StringPredicate,
+
+}
+
+impl Partition for SegmentPartition {
+    fn execute(&self) -> datafusion::error::Result<Arc<Mutex<dyn RecordBatchReader + Send + Sync>>> {
+        let combined_results: Vec<Arc<RecordBatch>> = vec![];
+
+        let segments = self.store.segments();
+
+        // prepare the string predicates in the manner Segments want them
+        let col_name = &self.string_predicate.col_name;
+        let scalar = column::Scalar::String(&self.string_predicate.value);
+
+
+        // Here
+        let _columns = segments.read_filter_eq(
+            self.time_range,
+            &[(col_name, Some(&scalar))],
+            vec![
+                "env".to_string(),
+                "method".to_string(),
+                "host".to_string(),
+                "counter".to_string(),
+                "time".to_string(),
+            ],
+        );
+
+        // If we were implementing this for real, we would not convert
+        // `columns` into RecordBatches and feed them back out
+
+
+        Ok(Arc::new(Mutex::new(RecordBatchIterator::new(
+            self.store.schema().clone(),
+            combined_results,
+        ))))
+
+
+    }
+}
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 02d9752db3..d371cf87e1 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -1,17 +1,17 @@
-use std::{fs, fs::File, rc::Rc, path::PathBuf};
+use std::{fs, fs::File, path::PathBuf, rc::Rc, sync::Arc};
 
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
 use arrow::{array, array::Array, datatypes};
 
 use delorean_mem_qe::column;
-use delorean_mem_qe::column::{Column};
+use delorean_mem_qe::column::Column;
 use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment};
-use delorean_mem_qe::Store;
+use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
 use parquet::arrow::arrow_reader::ArrowReader;
 
 // use snafu::ensure;
-use snafu::Snafu;
 use datatypes::TimeUnit;
+use snafu::Snafu;
 
 #[derive(Snafu, Debug, Clone, Copy, PartialEq)]
 pub enum Error {
@@ -29,21 +29,29 @@ fn format_size(sz: usize) -> String {
     human_format::Formatter::new().format(sz as f64)
 }
 
-
 fn main() {
     env_logger::init();
 
     //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
-    // This one was having issues being read into arrow with the last row groups
-    let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet");
+
+    //let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet");
+
+    // smaller file to test with
+    let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000068644-000000002/http_api_requests_total.parquet");
+
     let r = File::open(&path).unwrap();
     let file_size = fs::metadata(&path).expect("read metadata").len();
-    println!("Reading {} ({}) bytes of parquet from {:?}....",
-             format_size(file_size as usize), file_size, path);
+    println!(
+        "Reading {} ({}) bytes of parquet from {:?}....",
+        format_size(file_size as usize),
+        file_size,
+        path
+    );
 
     //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap();
     let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
-    let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
+    let mut reader =
+        parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
     let batch_size = 60000;
     let record_batch_reader = reader.get_record_reader(batch_size).unwrap();
 
@@ -58,8 +66,10 @@ fn main() {
         format_size(store.size()),
         store.size()
     );
+    let store = Arc::new(store);
 
     time_select_with_pred(&store);
+    time_datafusion_select_with_pred(store.clone());
     time_first_host(&store);
     time_sum_range(&store);
     time_count_range(&store);
@@ -184,10 +194,7 @@ fn main() {
     // }
 }
 
-fn build_store(
-    mut reader: impl RecordBatchReader,
-    store: &mut Store,
-) -> Result<(), Error> {
+fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<(), Error> {
     let mut total_rows_read = 0;
     let start = std::time::Instant::now();
     loop {
@@ -202,20 +209,28 @@ fn build_store(
                 total_rows_read += rb.num_rows();
                 let segment = convert_record_batch(rb)?;
                 store.add_segment(segment);
-            },
+            }
             Ok(None) => {
                 let now = std::time::Instant::now();
-                println!("Completed loading {} rows in {:?}", total_rows_read, now - start);
-                return Ok(())
+                println!(
+                    "Completed loading {} rows in {:?}",
+                    total_rows_read,
+                    now - start
+                );
+                return Ok(());
             }
         }
     }
 }
 
 fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
-    let mut segment = Segment::new(rb.num_rows());
+    let mut segment = Segment::new(rb.num_rows(), rb.schema().clone());
 
-    println!("Loading record batch: cols {:?} rows {:?}", rb.num_columns(), rb.num_rows());
+    println!(
+        "Loading record batch: cols {:?} rows {:?}",
+        rb.num_columns(),
+        rb.num_rows()
+    );
     for (i, column) in rb.columns().iter().enumerate() {
         match *column.data_type() {
             datatypes::DataType::Float64 => {
@@ -243,7 +258,10 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
                 if column.null_count() > 0 {
                     panic!("null times");
                 }
-                let arr = column.as_any().downcast_ref::<array::TimestampMicrosecondArray>().unwrap();
+                let arr = column
+                    .as_any()
+                    .downcast_ref::<array::TimestampMicrosecondArray>()
+                    .unwrap();
 
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
@@ -414,7 +432,6 @@ fn time_count_range(store: &Store) {
 // FROM measurement
 // WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
 // AND "env" = "prod01-eu-central-1"
-//
 fn time_select_with_pred(store: &Store) {
     let repeat = 100;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
@@ -447,6 +464,42 @@ fn time_select_with_pred(store: &Store) {
     );
 }
 
+/// DataFusion implementation of
+//
+// SELECT env, method, host, counter, time
+// FROM measurement
+// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
+// AND "env" = "prod01-eu-central-1"
+//
+// Use the hard coded timestamp values 1590036110000000, 1590040770000000
+
+fn time_datafusion_select_with_pred(store: Arc<Store>) {
+    let mut query_engine = DeloreanQueryEngine::new(store);
+
+    let sql_string = r#"SELECT env, method, host, counter, time
+               FROM measurement
+               WHERE time::BIGINT >= 1590036110000000
+               AND time::BIGINT < 1590040770000000
+               AND env = 'prod01-eu-central-1'
+     "#;
+
+    let repeat = 100;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+        track += query_engine.run_sql(&sql_string);
+        total_time += now.elapsed();
+    }
+    println!(
+        "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
 //
 // SELECT env, method, host, counter, time
 // FROM measurement
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 5fa99ebb1a..c8617e1cf0 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -1,8 +1,10 @@
+pub mod adapter;
 pub mod column;
 pub mod encoding;
 pub mod segment;
 pub mod sorter;
 
+use arrow::datatypes::SchemaRef;
 use segment::{Segment, Segments};
 
 #[derive(Debug, Default)]
@@ -31,4 +33,13 @@ impl Store {
     pub fn segments(&self) -> Segments {
         Segments::new(self.segments.iter().collect::<Vec<&Segment>>())
     }
+
+    pub fn schema(&self) -> SchemaRef {
+        assert!(
+            !self.segments.is_empty(),
+            "Need to have at least one segment in a store"
+        );
+        // assume all segments have the same schema
+        self.segments[0].schema()
+    }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 079b8b3dcd..8dee09d5f4 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
 
 use super::column;
 use super::column::Column;
+use arrow::datatypes::SchemaRef;
 
 // Only used in a couple of specific places for experimentation.
 const THREADS: usize = 16;
@@ -16,9 +17,9 @@ pub struct Segment {
 }
 
 impl Segment {
-    pub fn new(rows: usize) -> Self {
+    pub fn new(rows: usize, schema: SchemaRef) -> Self {
         Self {
-            meta: SegmentMetaData::new(rows),
+            meta: SegmentMetaData::new(rows, schema),
             columns: vec![],
             time_column_idx: 0,
         }
@@ -44,6 +45,10 @@ impl Segment {
         self.meta.time_range
     }
 
+    pub fn schema(&self) -> SchemaRef {
+        self.meta.schema()
+    }
+
     pub fn add_column(&mut self, name: &str, c: column::Column) {
         assert_eq!(
             self.meta.rows,
@@ -709,6 +714,7 @@ impl Segment {
 pub struct SegmentMetaData {
     size: usize, // TODO
     rows: usize,
+    schema: SchemaRef,
 
     column_names: Vec<String>,
     time_range: (i64, i64),
@@ -719,10 +725,11 @@ pub struct SegmentMetaData {
 }
 
 impl SegmentMetaData {
-    pub fn new(rows: usize) -> Self {
+    pub fn new(rows: usize, schema: SchemaRef) -> Self {
         let mut meta = Self {
             size: 0,
             rows,
+            schema,
             column_names: vec![],
             time_range: (0, 0),
             row_ids: croaring::Bitmap::create_with_capacity(rows as u32),
@@ -731,6 +738,10 @@ impl SegmentMetaData {
         meta
     }
 
+    pub fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
     pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {
         self.time_range.0 <= to && from <= self.time_range.1
     }

From 54e9d38589d57cf3ad212c9e1f35803d94fbbd90 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Thu, 20 Aug 2020 20:51:26 -0400
Subject: [PATCH 30/73] chore: update the refs to github

---
 Cargo.lock                 |  3 ++-
 delorean_mem_qe/Cargo.toml | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e25b863d24..ed5f18fdfb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -109,7 +109,6 @@ name = "arrow"
 version = "2.0.0-SNAPSHOT"
 source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
 dependencies = [
- "arrow-flight",
  "chrono",
  "csv",
  "flatbuffers",
@@ -128,7 +127,9 @@ dependencies = [
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
 dependencies = [
+ "arrow-flight",
  "chrono",
  "csv",
  "flatbuffers",
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 1b79c94649..c2086ac66f 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -9,12 +9,12 @@ edition = "2018"
 
 [dependencies]
 delorean_table = { path = "../delorean_table" }
-#arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
-arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
-#parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
-parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
-#datafusion = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" }
-datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" }
+arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
+parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
+datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
+#arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
+#parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
+#datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" }
 snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"

From 957ff79e2f76c15fda19ac3a77785b3c0432bde8 Mon Sep 17 00:00:00 2001
From: alamb <andrew@nerdnetworks.org>
Date: Fri, 21 Aug 2020 11:04:03 -0400
Subject: [PATCH 31/73] docs: add additional documentation for sets of test
 parameters

---
 delorean_mem_qe/src/adapter.rs | 35 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
index 90138cbd29..0fcb5cb34a 100644
--- a/delorean_mem_qe/src/adapter.rs
+++ b/delorean_mem_qe/src/adapter.rs
@@ -3,25 +3,26 @@
 use crate::Store;
 use arrow::{
     datatypes::{Schema, SchemaRef},
-    util::pretty, record_batch::{RecordBatch, RecordBatchReader},
+    record_batch::{RecordBatch, RecordBatchReader},
+    util::pretty,
 };
 use datafusion::prelude::*;
 use datafusion::{
     datasource::TableProvider,
     execution::{
         context::ExecutionContextState,
-        physical_plan::{ExecutionPlan, Partition, common::RecordBatchIterator},
+        physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition},
     },
     logicalplan::{make_logical_plan_node, Expr, LogicalPlan},
     lp::LogicalPlanNode,
     optimizer::utils,
 };
 
+use crate::column;
 use std::{
     fmt,
     sync::{Arc, Mutex},
 };
-use crate::column;
 
 /// Wrapper to adapt a Store to a DataFusion "TableProvider" --
 /// eventually we could also implement this directly on Store
@@ -118,8 +119,7 @@ impl DeloreanQueryEngine {
     fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan {
         if let LogicalPlan::Filter { predicate, input } = plan {
             // see if the input is a TableScan
-            if let LogicalPlan::TableScan { .. } = **input
-            {
+            if let LogicalPlan::TableScan { .. } = **input {
                 return make_logical_plan_node(Box::new(SegmentScan::new(
                     self.store.clone(),
                     predicate.clone(),
@@ -159,7 +159,6 @@ impl SegmentScan {
             predicate,
         }
     }
-
 }
 
 impl LogicalPlanNode for SegmentScan {
@@ -236,7 +235,7 @@ impl LogicalPlanNode for SegmentScan {
         let time_range = (1590036110000000, 1590040770000000);
         let string_predicate = StringPredicate {
             col_name: "env".into(),
-            value: "prod01-eu-central-1".into()
+            value: "prod01-eu-central-1".into(),
         };
 
         Ok(Arc::new(SegmentScanExec::new(
@@ -245,10 +244,8 @@ impl LogicalPlanNode for SegmentScan {
             string_predicate,
         )))
     }
-
 }
 
-
 #[derive(Debug, Clone)]
 struct StringPredicate {
     col_name: String,
@@ -262,13 +259,16 @@ pub struct SegmentScanExec {
 
     // Specialized predicates to apply
     time_range: (i64, i64),
-    string_predicate: StringPredicate
-
+    string_predicate: StringPredicate,
 }
 
 impl SegmentScanExec {
     fn new(store: Arc<Store>, time_range: (i64, i64), string_predicate: StringPredicate) -> Self {
-        SegmentScanExec { store , time_range, string_predicate }
+        SegmentScanExec {
+            store,
+            time_range,
+            string_predicate,
+        }
     }
 }
 
@@ -279,7 +279,7 @@ impl ExecutionPlan for SegmentScanExec {
 
     fn partitions(&self) -> datafusion::error::Result<Vec<Arc<dyn Partition>>> {
         let store = self.store.clone();
-        Ok(vec![Arc::new(SegmentPartition{
+        Ok(vec![Arc::new(SegmentPartition {
             store,
             time_range: self.time_range,
             string_predicate: self.string_predicate.clone(),
@@ -292,11 +292,12 @@ struct SegmentPartition {
     store: Arc<Store>,
     time_range: (i64, i64),
     string_predicate: StringPredicate,
-
 }
 
 impl Partition for SegmentPartition {
-    fn execute(&self) -> datafusion::error::Result<Arc<Mutex<dyn RecordBatchReader + Send + Sync>>> {
+    fn execute(
+        &self,
+    ) -> datafusion::error::Result<Arc<Mutex<dyn RecordBatchReader + Send + Sync>>> {
         let combined_results: Vec<Arc<RecordBatch>> = vec![];
 
         let segments = self.store.segments();
@@ -305,7 +306,6 @@ impl Partition for SegmentPartition {
         let col_name = &self.string_predicate.col_name;
         let scalar = column::Scalar::String(&self.string_predicate.value);
 
-
         // Here
         let _columns = segments.read_filter_eq(
             self.time_range,
@@ -322,12 +322,9 @@ impl Partition for SegmentPartition {
         // If we were implementing this for real, we would not convert
         // `columns` into RecordBatches and feed them back out
 
-
         Ok(Arc::new(Mutex::new(RecordBatchIterator::new(
             self.store.schema().clone(),
             combined_results,
         ))))
-
-
     }
 }

From 0132a600b551bbd5821827b7fa8d73d456c355fa Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 25 Aug 2020 11:37:59 +0100
Subject: [PATCH 32/73] feat: add schema wrapper for sort order

---
 delorean_mem_qe/src/segment.rs | 54 +++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 8dee09d5f4..5e8ff3549b 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, HashMap};
+use std::collections::{BTreeMap, BTreeSet, HashMap};
 
 use super::column;
 use super::column::Column;
@@ -7,6 +7,45 @@ use arrow::datatypes::SchemaRef;
 // Only used in a couple of specific places for experimentation.
 const THREADS: usize = 16;
 
+#[derive(Debug)]
+pub struct Schema {
+    _ref: SchemaRef,
+    col_sort_order: Vec<usize>,
+}
+
+impl Schema {
+    pub fn new(schema: SchemaRef) -> Self {
+        Self {
+            _ref: schema,
+            col_sort_order: vec![],
+        }
+    }
+
+    pub fn with_sort_order(schema: SchemaRef, sort_order: Vec<usize>) -> Self {
+        let set = sort_order.iter().collect::<BTreeSet<_>>();
+        assert_eq!(set.len(), sort_order.len());
+        assert!(sort_order.len() <= schema.fields().len());
+
+        Self {
+            _ref: schema,
+            col_sort_order: sort_order,
+        }
+    }
+
+    pub fn sort_order(&self) -> &[usize] {
+        self.col_sort_order.as_slice()
+    }
+
+    pub fn schema_ref(&self) -> SchemaRef {
+        self._ref.clone()
+    }
+
+    pub fn cols(&self) -> usize {
+        let len = &self._ref.fields().len();
+        *len
+    }
+}
+
 #[derive(Debug)]
 pub struct Segment {
     meta: SegmentMetaData,
@@ -17,10 +56,11 @@ pub struct Segment {
 }
 
 impl Segment {
-    pub fn new(rows: usize, schema: SchemaRef) -> Self {
+    pub fn new(rows: usize, schema: Schema) -> Self {
+        let cols = schema.cols();
         Self {
             meta: SegmentMetaData::new(rows, schema),
-            columns: vec![],
+            columns: Vec::with_capacity(cols),
             time_column_idx: 0,
         }
     }
@@ -709,12 +749,12 @@ impl Segment {
 }
 
 /// Meta data for a segment. This data is mainly used to determine if a segment
-/// may contain value for answering a query.
+/// may contain a value that can answer a query.
 #[derive(Debug)]
 pub struct SegmentMetaData {
     size: usize, // TODO
     rows: usize,
-    schema: SchemaRef,
+    schema: Schema,
 
     column_names: Vec<String>,
     time_range: (i64, i64),
@@ -725,7 +765,7 @@ pub struct SegmentMetaData {
 }
 
 impl SegmentMetaData {
-    pub fn new(rows: usize, schema: SchemaRef) -> Self {
+    pub fn new(rows: usize, schema: Schema) -> Self {
         let mut meta = Self {
             size: 0,
             rows,
@@ -739,7 +779,7 @@ impl SegmentMetaData {
     }
 
     pub fn schema(&self) -> SchemaRef {
-        self.schema.clone()
+        self.schema.schema_ref()
     }
 
     pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool {

From 577834c90f3734a4b0e2425c2cd1efa564057224 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 26 Aug 2020 12:38:28 +0100
Subject: [PATCH 33/73] test: fix broken tests

---
 delorean_mem_qe/src/encoding.rs | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 6d9ce4e4f8..7b265facf1 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -622,7 +622,8 @@ mod test {
         assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]);
 
         let bm = col.row_ids_single_cmp_roaring(&20, std::cmp::Ordering::Equal);
-        assert_eq!(bm.to_vec(), vec![]);
+        let exp: Vec<u32> = Vec::new();
+        assert_eq!(bm.to_vec(), exp);
     }
 
     #[test]
@@ -713,25 +714,13 @@ mod test {
         assert_eq!(drle.value(7).unwrap(), "zoo");
         assert_eq!(drle.value(8).unwrap(), "zoo");
 
-        let row_ids = drle
-            .index_row_ids
-            .get(&Some("hello".to_string()))
-            .unwrap()
-            .to_vec();
+        let row_ids = drle.index_row_ids.get(&0).unwrap().to_vec();
         assert_eq!(row_ids, vec![0, 1, 3, 4, 5]);
 
-        let row_ids = drle
-            .index_row_ids
-            .get(&Some("world".to_string()))
-            .unwrap()
-            .to_vec();
+        let row_ids = drle.index_row_ids.get(&1).unwrap().to_vec();
         assert_eq!(row_ids, vec![2]);
 
-        let row_ids = drle
-            .index_row_ids
-            .get(&Some("zoo".to_string()))
-            .unwrap()
-            .to_vec();
+        let row_ids = drle.index_row_ids.get(&2).unwrap().to_vec();
         assert_eq!(row_ids, vec![6, 7, 8]);
     }
 

From d1f9ca3acf55ca3096958e33a613f70199d43643 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 26 Aug 2020 15:44:18 +0100
Subject: [PATCH 34/73] feat: add support for providing column sort

---
 delorean_mem_qe/src/bin/main.rs | 224 +++++++++++---------------------
 delorean_mem_qe/src/segment.rs  | 136 ++++++++++++++-----
 2 files changed, 177 insertions(+), 183 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index d371cf87e1..28e4e7576f 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -1,11 +1,19 @@
-use std::{fs, fs::File, path::PathBuf, rc::Rc, sync::Arc};
+use std::{
+    env,
+    ffi::OsStr,
+    fs,
+    fs::File,
+    path::{Path, PathBuf},
+    rc::Rc,
+    sync::Arc,
+};
 
 use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array, array::Array, datatypes};
+use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
 use delorean_mem_qe::column::Column;
-use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment};
+use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Schema, Segment};
 use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
 use parquet::arrow::arrow_reader::ArrowReader;
 
@@ -31,34 +39,21 @@ fn format_size(sz: usize) -> String {
 
 fn main() {
     env_logger::init();
+    let args: Vec<String> = env::args().collect();
 
-    //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
-
-    //let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet");
-
-    // smaller file to test with
-    let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000068644-000000002/http_api_requests_total.parquet");
-
-    let r = File::open(&path).unwrap();
-    let file_size = fs::metadata(&path).expect("read metadata").len();
-    println!(
-        "Reading {} ({}) bytes of parquet from {:?}....",
-        format_size(file_size as usize),
-        file_size,
-        path
-    );
-
-    //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap();
-    let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
-    let mut reader =
-        parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
-    let batch_size = 60000;
-    let record_batch_reader = reader.get_record_reader(batch_size).unwrap();
-
-    //let reader = ipc::reader::StreamReader::try_new(r).unwrap();
+    let path = &args[1];
+    let mut sort_order = vec![];
+    if let Some(arg) = args.get(2) {
+        sort_order = arg.split(',').collect::<Vec<_>>();
+        println!("sort is {:?}", sort_order);
+    };
 
     let mut store = Store::default();
-    build_store(record_batch_reader, &mut store).unwrap();
+    match Path::new(path).extension().and_then(OsStr::to_str) {
+        Some("arrow") => build_arrow_store(path, &mut store, sort_order).unwrap(),
+        Some("parquet") => build_parquet_store(path, &mut store, sort_order).unwrap(),
+        _ => panic!("unsupported file type"),
+    }
 
     println!(
         "total segments {:?} with total size {} ({})",
@@ -76,127 +71,49 @@ fn main() {
     time_group_single_with_pred(&store);
     time_group_by_multi_agg_count(&store);
     time_group_by_multi_agg_sorted_count(&store);
-
-    // time_column_min_time(&store);
-    // time_column_max_time(&store);
-    // time_column_first(&store);
-    // let segments = store.segments();
-    // let res = segments.last("host").unwrap();
-    // println!("{:?}", res);
-
-    // let segments = segments
-    //     .filter_by_time(1590036110000000, 1590044410000000)
-    //     .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1"));
-    // let res = segments.first(
-    //     "env",
-    //     &column::Scalar::String("prod01-eu-central-1"),
-    //     1590036110000000,
-    // );
-    // println!("{:?}", res);
-    // let segments = segments.filter_by_time(1590036110000000, 1590044410000000);
-    // println!("{:?}", segments.last("host"));
-    // println!("{:?}", segments.segments().last().unwrap().row(14899));
-
-    // time_row_by_last_ts(&store);
-
-    // let rows = segments
-    //     .segments()
-    //     .last()
-    //     .unwrap()
-    //     .filter_by_predicate_eq(
-    //         Some((1590040770000000, 1590040790000000)),
-    //         vec![
-    //             ("env", Some(&column::Scalar::String("prod01-us-west-2"))),
-    //             ("method", Some(&column::Scalar::String("GET"))),
-    //             (
-    //                 "host",
-    //                 Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")),
-    //             ),
-    //         ],
-    //     )
-    //     .unwrap();
-
-    // for row_id in rows.iter() {
-    //     println!(
-    //         "{:?} - {:?}",
-    //         row_id,
-    //         segments.segments().last().unwrap().row(row_id as usize)
-    //     );
-    // }
-    // println!("{:?}", rows.cardinality());
-
-    // time_row_by_preds(&store);
-
-    // let segments = store.segments();
-    // let columns = segments.read_filter_eq(
-    //     (1590036110000000, 1590040770000000),
-    //     &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))],
-    //     vec![
-    //         "env".to_string(),
-    //         "method".to_string(),
-    //         "host".to_string(),
-    //         "counter".to_string(),
-    //         "time".to_string(),
-    //     ],
-    // );
-
-    // for (k, v) in columns {
-    //     println!("COLUMN {:?}", k);
-    //     // println!("ROWS ({:?}) {:?}", v.len(), 0);
-    //     println!("ROWS ({}) {:?}", v, v.len());
-    // }
-
-    // loop {
-    //     let now = std::time::Instant::now();
-    //     let segments = store.segments();
-    //     let groups = segments.read_group_eq(
-    //         (0, 1590044410000000),
-    //         &[],
-    //         vec!["env".to_string(), "role".to_string()],
-    //         vec![
-    //             ("counter".to_string(), Aggregate::Sum),
-    //             // ("counter".to_string(), Aggregate::Count),
-    //         ],
-    //     );
-    //     println!("{:?} {:?}", groups, now.elapsed());
-    // }
-
-    // loop {
-    //     let mut total_count = 0.0;
-    //     let now = std::time::Instant::now();
-    //     for segment in segments.segments() {
-    //         let (min, max) = segment.time_range();
-    //         let time_ids = segment
-    //             .filter_by_predicates_eq((min, max), &vec![])
-    //             .unwrap();
-
-    //         let group_ids = segment.group_by_column_ids("env").unwrap();
-    //         for (col_values, row_ids) in group_ids {
-    //             // filter ids by time
-    //             let mut result = row_ids.and(&time_ids);
-    //             // let
-    //             // println!(
-    //             //     "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})",
-    //             //     min,
-    //             //     max,
-    //             //     col_values,
-    //             //     segment.sum_column(&"counter", &result),
-    //             //     result.cardinality(),
-    //             // );
-    //             if let column::Scalar::Float(x) =
-    //                 segment.sum_column(&"counter", &mut result).unwrap()
-    //             {
-    //                 total_count += x;
-    //             }
-    //         }
-    //     }
-    //     println!("Done ({:?}) in {:?}", total_count, now.elapsed());
-    // }
 }
 
-fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<(), Error> {
+fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
+    let path = PathBuf::from(path);
+    let r = File::open(&path).unwrap();
+    let file_size = fs::metadata(&path).expect("read metadata").len();
+    println!(
+        "Reading {} ({}) bytes of Parquet from {:?}....",
+        format_size(file_size as usize),
+        file_size,
+        path
+    );
+
+    let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
+    let mut reader =
+        parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
+    let batch_size = 60000;
+    let record_batch_reader = reader.get_record_reader(batch_size).unwrap();
+    build_store(record_batch_reader, store, sort_order)
+}
+
+fn build_arrow_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
+    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
+    let file_size = fs::metadata(&path).expect("read metadata").len();
+    println!(
+        "Reading {} ({}) bytes of Arrow from {:?}....",
+        format_size(file_size as usize),
+        file_size,
+        path
+    );
+
+    let reader = ipc::reader::StreamReader::try_new(r).unwrap();
+    build_store(reader, store, sort_order)
+}
+
+fn build_store(
+    mut reader: impl RecordBatchReader,
+    store: &mut Store,
+    sort_order: Vec<&str>,
+) -> Result<(), Error> {
     let mut total_rows_read = 0;
     let start = std::time::Instant::now();
+    // let mut i = 0;
     loop {
         let rb = reader.next_batch();
         match rb {
@@ -206,8 +123,15 @@ fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<
                 //     i += 1;
                 //     continue;
                 // }
+                let schema = Schema::with_sort_order(
+                    rb.schema(),
+                    sort_order.iter().map(|s| s.to_string()).collect(),
+                );
+
                 total_rows_read += rb.num_rows();
-                let segment = convert_record_batch(rb)?;
+                let mut segment = Segment::new(rb.num_rows(), schema);
+                convert_record_batch(rb, &mut segment)?;
+
                 store.add_segment(segment);
             }
             Ok(None) => {
@@ -223,9 +147,7 @@ fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<
     }
 }
 
-fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
-    let mut segment = Segment::new(rb.num_rows(), rb.schema().clone());
-
+fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Error> {
     println!(
         "Loading record batch: cols {:?} rows {:?}",
         rb.num_columns(),
@@ -333,7 +255,7 @@ fn convert_record_batch(rb: RecordBatch) -> Result<Segment, Error> {
             ref d @ _ => panic!("unsupported datatype: {:?}", d),
         }
     }
-    Ok(segment)
+    Ok(())
 }
 
 //
@@ -582,7 +504,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
     ];
 
     for strat in &strats {
-        let repeat = 10;
+        let repeat = 10000;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 5e8ff3549b..d1a80ae389 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -10,7 +10,7 @@ const THREADS: usize = 16;
 #[derive(Debug)]
 pub struct Schema {
     _ref: SchemaRef,
-    col_sort_order: Vec<usize>,
+    col_sort_order: Vec<String>,
 }
 
 impl Schema {
@@ -21,7 +21,7 @@ impl Schema {
         }
     }
 
-    pub fn with_sort_order(schema: SchemaRef, sort_order: Vec<usize>) -> Self {
+    pub fn with_sort_order(schema: SchemaRef, sort_order: Vec<String>) -> Self {
         let set = sort_order.iter().collect::<BTreeSet<_>>();
         assert_eq!(set.len(), sort_order.len());
         assert!(sort_order.len() <= schema.fields().len());
@@ -32,7 +32,7 @@ impl Schema {
         }
     }
 
-    pub fn sort_order(&self) -> &[usize] {
+    pub fn sort_order(&self) -> &[String] {
         self.col_sort_order.as_slice()
     }
 
@@ -65,30 +65,6 @@ impl Segment {
         }
     }
 
-    pub fn num_rows(&self) -> usize {
-        self.meta.rows
-    }
-
-    pub fn column_names(&self) -> &[String] {
-        &self.meta.column_names
-    }
-
-    /// column returns the column with name
-    pub fn column(&self, name: &str) -> Option<&column::Column> {
-        if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) {
-            return self.columns.get(*id);
-        }
-        None
-    }
-
-    pub fn time_range(&self) -> (i64, i64) {
-        self.meta.time_range
-    }
-
-    pub fn schema(&self) -> SchemaRef {
-        self.meta.schema()
-    }
-
     pub fn add_column(&mut self, name: &str, c: column::Column) {
         assert_eq!(
             self.meta.rows,
@@ -115,6 +91,30 @@ impl Segment {
         self.columns.push(c);
     }
 
+    pub fn num_rows(&self) -> usize {
+        self.meta.rows
+    }
+
+    pub fn column_names(&self) -> &[String] {
+        &self.meta.column_names
+    }
+
+    /// column returns the column with name
+    pub fn column(&self, name: &str) -> Option<&column::Column> {
+        if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) {
+            return self.columns.get(*id);
+        }
+        None
+    }
+
+    pub fn time_range(&self) -> (i64, i64) {
+        self.meta.time_range
+    }
+
+    pub fn schema(&self) -> SchemaRef {
+        self.meta.schema()
+    }
+
     // TODO - iterator....
     /// Returns the size of the segment in bytes.
     pub fn size(&self) -> usize {
@@ -193,6 +193,34 @@ impl Segment {
         None
     }
 
+    // Determines if a segment is already sorted by a group key. Only supports
+    // ascending ordering at the moment. If this function returns true then
+    // the columns being grouped on are naturally sorted and for basic
+    // aggregations should not need to be sorted or hashed.
+    fn group_key_sorted(&self, group_cols: &[String]) -> bool {
+        let sorted_by_cols = self.meta.schema.sort_order();
+        if group_cols.len() > sorted_by_cols.len() {
+            // grouping by more columns than there are defined sorts.
+            return false;
+        }
+
+        let mut covered = 0;
+        'outer: for sc in sorted_by_cols {
+            // find col in group key - doesn't matter what location in group key
+            for gc in group_cols {
+                if sc == gc {
+                    covered += 1;
+                    continue 'outer;
+                }
+            }
+
+            // didn't find this sorted column in group key. That's okay if there
+            // are no more columns being grouped
+            return covered == group_cols.len();
+        }
+        true
+    }
+
     pub fn aggregate_by_group_with_hash(
         &self,
         time_range: (i64, i64),
@@ -419,8 +447,6 @@ impl Segment {
                 group_column_encoded_values.push(None);
             }
         }
-        let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
-        // println!("grouped columns {:?}", group_column_encoded_values);
 
         // TODO(edd): we could do this with an iterator I expect.
         //
@@ -460,9 +486,14 @@ impl Segment {
             }
         }
 
-        // now sort on the first grouping columns. Right now the order doesn't matter...
         let now = std::time::Instant::now();
-        super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
+        if self.group_key_sorted(group_columns) {
+            log::debug!("segment already sorted by group key {:?}", group_columns);
+        } else {
+            // now sort on the first grouping columns. Right now the order doesn't matter...
+            let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
+            super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
+        }
         log::debug!("time checking sort {:?}", now.elapsed());
 
         let mut group_itrs = all_columns
@@ -1191,4 +1222,45 @@ pub enum GroupingStrategy {
 }
 
 #[cfg(test)]
-mod test {}
+mod test {
+
+    use arrow::datatypes::*;
+
+    #[test]
+    fn segment_group_key_sorted() {
+        let schema = super::Schema::with_sort_order(
+            arrow::datatypes::SchemaRef::new(Schema::new(vec![
+                Field::new("env", DataType::Utf8, false),
+                Field::new("role", DataType::Utf8, false),
+                Field::new("path", DataType::Utf8, false),
+                Field::new("time", DataType::Int64, false),
+            ])),
+            vec![
+                "env".to_string(),
+                "role".to_string(),
+                "path".to_string(),
+                "time".to_string(),
+            ],
+        );
+        let s = super::Segment::new(0, schema);
+
+        let cases = vec![
+            (vec!["env"], true),
+            (vec!["role"], false),
+            (vec!["foo"], false),
+            (vec![], true),
+            (vec!["env", "role"], true),
+            (vec!["env", "role", "foo"], false), // group key contains non-sorted col
+            (vec!["env", "role", "path", "time"], true),
+            (vec!["env", "role", "path", "time", "foo"], false), // group key contains non-sorted col
+            (vec!["env", "path", "role"], true), // order of columns in group key does not matter
+        ];
+
+        for (group_key, expected) in cases {
+            assert_eq!(
+                s.group_key_sorted(&group_key.iter().map(|x| x.to_string()).collect::<Vec<_>>()),
+                expected
+            );
+        }
+    }
+}

From f588b9ff6171c62260081a95851bc10f4943ddf0 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 26 Aug 2020 21:51:46 +0100
Subject: [PATCH 35/73] refactor: move AggregateType and push aggregates down

---
 delorean_mem_qe/src/bin/main.rs | 46 +++++++++++++++++++++------------
 delorean_mem_qe/src/column.rs   | 39 ++++++++++++++++++++++++++--
 delorean_mem_qe/src/encoding.rs | 12 +++++++++
 delorean_mem_qe/src/segment.rs  | 36 ++++++++++++++------------
 4 files changed, 97 insertions(+), 36 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 28e4e7576f..a58ce879d2 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -12,8 +12,8 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader};
 use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
-use delorean_mem_qe::column::Column;
-use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Schema, Segment};
+use delorean_mem_qe::column::{AggregateType, Column};
+use delorean_mem_qe::segment::{GroupingStrategy, Schema, Segment};
 use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
 use parquet::arrow::arrow_reader::ArrowReader;
 
@@ -63,13 +63,13 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    time_select_with_pred(&store);
-    time_datafusion_select_with_pred(store.clone());
-    time_first_host(&store);
-    time_sum_range(&store);
-    time_count_range(&store);
-    time_group_single_with_pred(&store);
-    time_group_by_multi_agg_count(&store);
+    // time_select_with_pred(&store);
+    // time_datafusion_select_with_pred(store.clone());
+    // time_first_host(&store);
+    // time_sum_range(&store);
+    // time_count_range(&store);
+    // time_group_single_with_pred(&store);
+    // time_group_by_multi_agg_count(&store);
     time_group_by_multi_agg_sorted_count(&store);
 }
 
@@ -113,7 +113,7 @@ fn build_store(
 ) -> Result<(), Error> {
     let mut total_rows_read = 0;
     let start = std::time::Instant::now();
-    // let mut i = 0;
+    let mut i = 0;
     loop {
         let rb = reader.next_batch();
         match rb {
@@ -441,7 +441,7 @@ fn time_group_single_with_pred(store: &Store) {
                 (1588834080000000, 1590044410000000),
                 &[],
                 &"env".to_string(),
-                &vec![("counter".to_string(), Aggregate::Count)],
+                &vec![("counter".to_string(), AggregateType::Count)],
             );
             track += results.len();
         }
@@ -457,6 +457,12 @@ fn time_group_single_with_pred(store: &Store) {
     );
 }
 
+//
+// SELECT COUNT(counter)
+// FROM measurement
+// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
+// GROUP BY "status", "method"
+//
 fn time_group_by_multi_agg_count(store: &Store) {
     let strats = vec![
         GroupingStrategy::HashGroup,
@@ -477,7 +483,7 @@ fn time_group_by_multi_agg_count(store: &Store) {
                 (1589000000000001, 1590044410000000),
                 &[],
                 vec!["status".to_string(), "method".to_string()],
-                vec![("counter".to_string(), Aggregate::Count)],
+                vec![("counter".to_string(), AggregateType::Count)],
                 strat,
             );
 
@@ -495,16 +501,22 @@ fn time_group_by_multi_agg_count(store: &Store) {
     }
 }
 
+//
+// SELECT COUNT(counter)
+// FROM measurement
+// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30"
+// GROUP BY "env", "role"
+//
 fn time_group_by_multi_agg_sorted_count(store: &Store) {
     let strats = vec![
-        GroupingStrategy::HashGroup,
-        GroupingStrategy::HashGroupConcurrent,
+        // GroupingStrategy::HashGroup,
+        // GroupingStrategy::HashGroupConcurrent,
         GroupingStrategy::SortGroup,
-        GroupingStrategy::SortGroupConcurrent,
+        // GroupingStrategy::SortGroupConcurrent,
     ];
 
     for strat in &strats {
-        let repeat = 10000;
+        let repeat = 10;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
@@ -515,7 +527,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
                 (1589000000000001, 1590044410000000),
                 &[],
                 vec!["env".to_string(), "role".to_string()],
-                vec![("counter".to_string(), Aggregate::Count)],
+                vec![("counter".to_string(), AggregateType::Count)],
                 strat,
             );
 
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index a2ac7e83c2..3ce8ee0357 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -103,6 +103,12 @@ pub enum Aggregate<'a> {
     Sum(Scalar<'a>),
 }
 
+#[derive(Debug, Clone)]
+pub enum AggregateType {
+    Count,
+    Sum,
+}
+
 impl<'a> Aggregate<'a> {
     pub fn update_with(&mut self, other: Scalar<'a>) {
         match self {
@@ -319,8 +325,8 @@ impl Column {
         }
     }
 
-    /// Materialise all of the decoded values matching the provided logical
-    /// row ids.
+    /// Materialise the decoded value matching the provided logical
+    /// row id.
     pub fn value(&self, row_id: usize) -> Option<Scalar> {
         match self {
             Column::String(c) => {
@@ -726,6 +732,27 @@ impl Column {
         }
     }
 
+    pub fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate {
+        match self {
+            Column::String(_) => unimplemented!("not implemented"),
+            Column::Float(c) => match agg_type {
+                AggregateType::Count => {
+                    Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64)
+                }
+                AggregateType::Sum => {
+                    Aggregate::Sum(Scalar::Float(c.sum_by_id_range(from_row_id, to_row_id)))
+                }
+            },
+
+            Column::Integer(_) => unimplemented!("not implemented"),
+        }
+    }
+
     pub fn group_by_ids(&self) -> &std::collections::BTreeMap<u32, croaring::Bitmap> {
         match self {
             Column::String(c) => c.data.group_row_ids(),
@@ -977,6 +1004,14 @@ impl Float {
     pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 {
         self.data.sum_by_ids(row_ids)
     }
+
+    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 {
+        self.data.sum_by_id_range(from_row_id, to_row_id)
+    }
+
+    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        self.data.count_by_id_range(from_row_id, to_row_id)
+    }
 }
 
 impl From<&[f64]> for Float {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 7b265facf1..8e9e282442 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -156,6 +156,18 @@ where
         bm
     }
 
+    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
+        let mut res = T::default();
+        for v in self.values[from_row_id..to_row_id].iter() {
+            res += *v;
+        }
+        res
+    }
+
+    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        to_row_id - from_row_id
+    }
+
     // TODO(edd): make faster
     pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
         let mut res = T::default();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index d1a80ae389..f3c6288df6 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -1,7 +1,7 @@
 use std::collections::{BTreeMap, BTreeSet, HashMap};
 
 use super::column;
-use super::column::Column;
+use super::column::{AggregateType, Column};
 use arrow::datatypes::SchemaRef;
 
 // Only used in a couple of specific places for experimentation.
@@ -226,7 +226,7 @@ impl Segment {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
-        aggregates: &[(String, Aggregate)],
+        aggregates: &[(String, AggregateType)],
     ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
         // println!("working segment {:?}", time_range);
         // Build a hash table - essentially, scan columns for matching row ids,
@@ -323,7 +323,7 @@ impl Segment {
 
         let mut hash_table: HashMap<
             Vec<Option<&i64>>,
-            Vec<(&String, &Aggregate, Option<column::Aggregate>)>,
+            Vec<(&String, &AggregateType, Option<column::Aggregate>)>,
         > = HashMap::with_capacity(30000);
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
@@ -361,7 +361,7 @@ impl Segment {
             // a place-holder for each aggregate being executed.
             let group_key_entry = hash_table.entry(group_row).or_insert_with(|| {
                 // TODO COULD BE MAP/COLLECT
-                let mut agg_results: Vec<(&String, &Aggregate, Option<column::Aggregate>)> =
+                let mut agg_results: Vec<(&String, &AggregateType, Option<column::Aggregate>)> =
                     Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
@@ -396,8 +396,10 @@ impl Segment {
                         },
                         None => {
                             *cum_agg_value = match agg_type {
-                                Aggregate::Count => Some(column::Aggregate::Count(0)),
-                                Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())),
+                                AggregateType::Count => Some(column::Aggregate::Count(0)),
+                                AggregateType::Sum => {
+                                    Some(column::Aggregate::Sum(row_value.clone()))
+                                }
                             }
                         }
                     }
@@ -414,7 +416,7 @@ impl Segment {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
-        aggregates: &[(String, Aggregate)],
+        aggregates: &[(String, AggregateType)],
     ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
@@ -536,8 +538,8 @@ impl Segment {
             .zip(last_agg_row.iter())
             .map(|((col_name, agg_type), curr_agg)| {
                 let agg = match agg_type {
-                    Aggregate::Count => column::Aggregate::Count(1),
-                    Aggregate::Sum => column::Aggregate::Sum(curr_agg.clone()),
+                    AggregateType::Count => column::Aggregate::Count(1),
+                    AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()),
                 };
                 (col_name.clone(), agg)
             })
@@ -717,8 +719,8 @@ impl Segment {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_column: &String,
-        aggregates: &Vec<(String, Aggregate)>,
-    ) -> BTreeMap<u32, Vec<((String, Aggregate), column::Aggregate)>> {
+        aggregates: &Vec<(String, column::AggregateType)>,
+    ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate)>> {
         let mut grouped_results = BTreeMap::new();
 
         let filter_row_ids: croaring::Bitmap;
@@ -734,12 +736,12 @@ impl Segment {
                 let mut filtered_row_ids = row_ids.and(&filter_row_ids);
                 if !filtered_row_ids.is_empty() {
                     // First calculate all of the aggregates for this grouped value
-                    let mut aggs: Vec<((String, Aggregate), column::Aggregate)> =
+                    let mut aggs: Vec<((String, AggregateType), column::Aggregate)> =
                         Vec::with_capacity(aggregates.len());
 
                     for (col_name, agg) in aggregates {
                         match &agg {
-                            Aggregate::Sum => {
+                            AggregateType::Sum => {
                                 aggs.push((
                                     (col_name.to_string(), agg.clone()),
                                     column::Aggregate::Sum(
@@ -747,7 +749,7 @@ impl Segment {
                                     ), // assuming no non-null group keys
                                 ));
                             }
-                            Aggregate::Count => {
+                            AggregateType::Count => {
                                 aggs.push((
                                     (col_name.to_string(), agg.clone()),
                                     column::Aggregate::Count(
@@ -898,7 +900,7 @@ impl<'a> Segments<'a> {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
-        aggregates: Vec<(String, Aggregate)>,
+        aggregates: Vec<(String, AggregateType)>,
         strategy: &GroupingStrategy,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         let (min, max) = time_range;
@@ -957,7 +959,7 @@ impl<'a> Segments<'a> {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
-        aggregates: Vec<(String, Aggregate)>,
+        aggregates: Vec<(String, AggregateType)>,
         concurrent: bool,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         if concurrent {
@@ -1034,7 +1036,7 @@ impl<'a> Segments<'a> {
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
-        aggregates: Vec<(String, Aggregate)>,
+        aggregates: Vec<(String, AggregateType)>,
         concurrent: bool,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         if concurrent {

From ab866073e30f7a9ecf046f7c06b336fb61576b48 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 26 Aug 2020 22:44:45 +0100
Subject: [PATCH 36/73] perf: faster group by with sorted cols

---
 delorean_mem_qe/src/bin/main.rs |   6 +-
 delorean_mem_qe/src/segment.rs  | 351 +++++++++++++++++++++++++++++++-
 2 files changed, 349 insertions(+), 8 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index a58ce879d2..f89d7af645 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -509,10 +509,10 @@ fn time_group_by_multi_agg_count(store: &Store) {
 //
 fn time_group_by_multi_agg_sorted_count(store: &Store) {
     let strats = vec![
-        // GroupingStrategy::HashGroup,
-        // GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::HashGroup,
+        GroupingStrategy::HashGroupConcurrent,
         GroupingStrategy::SortGroup,
-        // GroupingStrategy::SortGroupConcurrent,
+        GroupingStrategy::SortGroupConcurrent,
     ];
 
     for strat in &strats {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index f3c6288df6..2e03f9d1d0 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -410,13 +410,37 @@ impl Segment {
         log::debug!("{:?}", hash_table);
         BTreeMap::new()
     }
-
     pub fn aggregate_by_group_with_sort(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        if self.group_key_sorted(group_columns) {
+            log::info!("group key is already sorted {:?}", group_columns);
+            self.aggregate_by_group_with_sort_sorted(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+            )
+        } else {
+            self.aggregate_by_group_with_sort_unsorted(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+            )
+        }
+    }
+
+    fn aggregate_by_group_with_sort_unsorted(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, AggregateType)],
     ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
@@ -490,7 +514,7 @@ impl Segment {
 
         let now = std::time::Instant::now();
         if self.group_key_sorted(group_columns) {
-            log::debug!("segment already sorted by group key {:?}", group_columns);
+            panic!("This shouldn't be called!!!");
         } else {
             // now sort on the first grouping columns. Right now the order doesn't matter...
             let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
@@ -595,6 +619,281 @@ impl Segment {
         BTreeMap::new()
     }
 
+    // this method assumes that the segment's columns are sorted such that a
+    // sort of columns is not required.
+    fn aggregate_by_group_with_sort_sorted(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, AggregateType)],
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        // filter on predicates and time
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeMap::new();
+        }
+        let total_rows = &filtered_row_ids.cardinality();
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+
+        // materialise all encoded values for the matching rows in the columns
+        // we are grouping on and store each group as an iterator.
+        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
+        for group_column in group_columns {
+            if let Some(column) = self.column(&group_column) {
+                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    encoded_values.len()
+                );
+
+                group_column_encoded_values.push(encoded_values);
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        let mut new_agg_cols = Vec::with_capacity(aggregates.len());
+        for (column_name, agg_type) in aggregates {
+            new_agg_cols.push((column_name, agg_type, self.column(&column_name)));
+        }
+
+        let mut group_itrs = group_column_encoded_values
+            .iter()
+            .map(|vector| {
+                if let column::Vector::Integer(v) = vector {
+                    v.iter()
+                } else {
+                    panic!("don't support grouping on non-encoded values");
+                }
+            })
+            .collect::<Vec<_>>();
+
+        // this tracks the last seen group key row. When it changes we can emit
+        // the grouped aggregates.
+        let mut last_group_row = group_itrs
+            .iter_mut()
+            .map(|itr| itr.next().unwrap())
+            .collect::<Vec<_>>();
+
+        let mut curr_group_row = last_group_row.clone();
+
+        let mut results = BTreeMap::new();
+        let mut processed_rows = 1;
+
+        let mut group_key_start_row_id = 0;
+        let mut group_size = 0;
+
+        while processed_rows < *total_rows {
+            // update next group key.
+            let mut group_key_changed = false;
+            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
+                let next_v = itr.next().unwrap();
+                if curr_v != &next_v {
+                    group_key_changed = true;
+                }
+                *curr_v = next_v;
+            }
+
+            // group key changed - emit group row and aggregates.
+            if group_key_changed {
+                let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
+                for (name, agg_type, col) in &new_agg_cols {
+                    if let Some(c) = col {
+                        let agg_result = c.aggregate_by_id_range(
+                            agg_type,
+                            group_key_start_row_id,
+                            group_key_start_row_id + group_size,
+                        );
+                        group_key_aggregates.push((name, agg_result));
+                    } else {
+                        panic!("figure this out");
+                    }
+                }
+
+                let key = last_group_row.clone();
+                results.insert(key, group_key_aggregates);
+
+                // update group key
+                last_group_row = curr_group_row.clone();
+
+                // reset counters tracking group key row range
+                group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one?
+                group_size = 0;
+            }
+
+            group_size += 1;
+            processed_rows += 1;
+        }
+
+        // Emit final row
+        let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
+        for (name, agg_type, col) in &new_agg_cols {
+            if let Some(c) = col {
+                let agg_result = c.aggregate_by_id_range(
+                    agg_type,
+                    group_key_start_row_id,
+                    group_key_start_row_id + group_size,
+                );
+                group_key_aggregates.push((name, agg_result));
+            } else {
+                panic!("figure this out");
+            }
+        }
+
+        let key = last_group_row.clone();
+        results.insert(key, group_key_aggregates);
+
+        log::info!("({:?} rows processed) {:?}", processed_rows, results);
+        // results
+        BTreeMap::new()
+    }
+
+    // this method assumes that the segment's columns are sorted such that a
+    // sort of columns is not required.
+    fn window_aggregate_with_sort_sorted(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, AggregateType)],
+        window: i64,
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        // filter on predicates and time
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeMap::new();
+        }
+        let total_rows = &filtered_row_ids.cardinality();
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+
+        // materialise all encoded values for the matching rows in the columns
+        // we are grouping on and store each group as an iterator.
+        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
+        for group_column in group_columns {
+            if let Some(column) = self.column(&group_column) {
+                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    encoded_values.len()
+                );
+
+                group_column_encoded_values.push(encoded_values);
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        let mut new_agg_cols = Vec::with_capacity(aggregates.len());
+        for (column_name, agg_type) in aggregates {
+            new_agg_cols.push((column_name, agg_type, self.column(&column_name)));
+        }
+
+        let mut group_itrs = group_column_encoded_values
+            .iter()
+            .map(|vector| {
+                if let column::Vector::Integer(v) = vector {
+                    v.iter()
+                } else {
+                    panic!("don't support grouping on non-encoded values");
+                }
+            })
+            .collect::<Vec<_>>();
+
+        // this tracks the last seen group key row. When it changes we can emit
+        // the grouped aggregates.
+        let mut last_group_row = group_itrs
+            .iter_mut()
+            .map(|itr| itr.next().unwrap())
+            .collect::<Vec<_>>();
+
+        let mut curr_group_row = last_group_row.clone();
+
+        let mut results = BTreeMap::new();
+        let mut processed_rows = 1;
+
+        let mut group_key_start_row_id = 0;
+        let mut group_size = 0;
+
+        while processed_rows < *total_rows {
+            // update next group key.
+            let mut group_key_changed = false;
+            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
+                let next_v = itr.next().unwrap();
+                if curr_v != &next_v {
+                    group_key_changed = true;
+                }
+                *curr_v = next_v;
+            }
+
+            // group key changed - emit group row and aggregates.
+            if group_key_changed {
+                let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
+                for (name, agg_type, col) in &new_agg_cols {
+                    if let Some(c) = col {
+                        let agg_result = c.aggregate_by_id_range(
+                            agg_type,
+                            group_key_start_row_id,
+                            group_key_start_row_id + group_size,
+                        );
+                        group_key_aggregates.push((name, agg_result));
+                    } else {
+                        panic!("figure this out");
+                    }
+                }
+
+                let key = last_group_row.clone();
+                results.insert(key, group_key_aggregates);
+
+                // update group key
+                last_group_row = curr_group_row.clone();
+
+                // reset counters tracking group key row range
+                group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one?
+                group_size = 0;
+            }
+
+            group_size += 1;
+            processed_rows += 1;
+        }
+
+        // Emit final row
+        let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
+        for (name, agg_type, col) in &new_agg_cols {
+            if let Some(c) = col {
+                let agg_result = c.aggregate_by_id_range(
+                    agg_type,
+                    group_key_start_row_id,
+                    group_key_start_row_id + group_size,
+                );
+                group_key_aggregates.push((name, agg_result));
+            } else {
+                panic!("figure this out");
+            }
+        }
+
+        let key = last_group_row.clone();
+        results.insert(key, group_key_aggregates);
+
+        log::info!("({:?} rows processed) {:?}", processed_rows, results);
+        // results
+        BTreeMap::new()
+    }
+
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
@@ -1058,7 +1357,7 @@ impl<'a> Segments<'a> {
                                 &aggregates,
                             );
                             log::info!(
-                                "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                                "processed segment {:?} using multi-threaded sort in {:?}",
                                 segment.time_range(),
                                 now.elapsed()
                             )
@@ -1078,7 +1377,7 @@ impl<'a> Segments<'a> {
                     &aggregates_arc.clone(),
                 );
                 log::info!(
-                    "processed segment {:?} using multi-threaded hash-grouping in {:?}",
+                    "processed segment {:?} using multi-threaded sort in {:?}",
                     segment.time_range(),
                     now.elapsed()
                 )
@@ -1099,7 +1398,7 @@ impl<'a> Segments<'a> {
                 &aggregates,
             );
             log::info!(
-                "processed segment {:?} using single-threaded hash-grouping in {:?}",
+                "processed segment {:?} using single-threaded sort in {:?}",
                 segment.time_range(),
                 now.elapsed()
             )
@@ -1129,6 +1428,48 @@ impl<'a> Segments<'a> {
         min_min
     }
 
+    pub fn window_agg_eq(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: Vec<String>,
+        aggregates: Vec<(String, AggregateType)>,
+        strategy: &GroupingStrategy,
+        window: i64,
+    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        let (min, max) = time_range;
+        if max <= min {
+            panic!("max <= min");
+        }
+
+        match strategy {
+            GroupingStrategy::HashGroup => {
+                panic!("not yet");
+            }
+            GroupingStrategy::HashGroupConcurrent => {
+                panic!("not yet");
+            }
+            GroupingStrategy::SortGroup => {
+                return self.read_group_eq_sort(
+                    time_range,
+                    predicates,
+                    group_columns,
+                    aggregates,
+                    false,
+                )
+            }
+            GroupingStrategy::SortGroupConcurrent => {
+                panic!("not yet");
+            }
+        }
+
+        // TODO(edd): merge results - not expensive really...
+        // let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
+        //     BTreeMap::new();
+
+        // cum_results
+    }
+
     /// Returns the maximum value for a column in a set of segments.
     pub fn column_max(&self, column_name: &str) -> Option<column::Scalar> {
         if self.segments.is_empty() {

From 4a153f5f7d5dea62e17bc84708a7d65e39960f95 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 27 Aug 2020 13:59:24 +0100
Subject: [PATCH 37/73] feat: generalise windowing to sorted/unsorted groups

---
 delorean_mem_qe/src/bin/main.rs |  31 +++-
 delorean_mem_qe/src/encoding.rs |   3 +-
 delorean_mem_qe/src/segment.rs  | 305 ++++++++++++++++++++++++++++----
 3 files changed, 305 insertions(+), 34 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index f89d7af645..e8bfc3dbd7 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -70,7 +70,8 @@ fn main() {
     // time_count_range(&store);
     // time_group_single_with_pred(&store);
     // time_group_by_multi_agg_count(&store);
-    time_group_by_multi_agg_sorted_count(&store);
+    // time_group_by_multi_agg_sorted_count(&store);
+    time_window_agg_sorted_count(&store);
 }
 
 fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
@@ -544,3 +545,31 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
         );
     }
 }
+
+fn time_window_agg_sorted_count(store: &Store) {
+    let repeat = 10;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut total_max = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let groups = segments.window_agg_eq(
+            (1589000000000001, 1590044410000000),
+            &[],
+            vec!["env".to_string(), "role".to_string()],
+            vec![("counter".to_string(), AggregateType::Count)],
+            60000000 * 10, // 10 minutes
+        );
+
+        total_time += now.elapsed();
+        total_max += groups.len();
+    }
+    println!(
+        "time_window_agg_sorted_count ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        total_max
+    );
+}
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 8e9e282442..d6a865a5f1 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -51,13 +51,14 @@ where
             out.push(self.values[chunks[2]]);
             out.push(self.values[chunks[1]]);
             out.push(self.values[chunks[0]]);
-            // out.push(self.values[row_id]);
         }
 
         let rem = row_ids.len() % 4;
         for &i in &row_ids[row_ids.len() - rem..row_ids.len()] {
             out.push(self.values[i]);
         }
+
+        assert_eq!(out.len(), row_ids.len());
         out
     }
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 2e03f9d1d0..e99c7e0181 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -410,6 +410,7 @@ impl Segment {
         log::debug!("{:?}", hash_table);
         BTreeMap::new()
     }
+
     pub fn aggregate_by_group_with_sort(
         &self,
         time_range: (i64, i64),
@@ -756,6 +757,35 @@ impl Segment {
         BTreeMap::new()
     }
 
+    pub fn window_aggregate_with_sort(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, AggregateType)],
+        window: i64,
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        if self.group_key_sorted(group_columns) {
+            log::info!("group key is already sorted {:?}", group_columns);
+            self.window_aggregate_with_sort_sorted(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+            )
+        } else {
+            log::info!("group key needs sorting {:?}", group_columns);
+            self.window_aggregate_with_sort_unsorted(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+            )
+        }
+    }
+
     // this method assumes that the segment's columns are sorted such that a
     // sort of columns is not required.
     fn window_aggregate_with_sort_sorted(
@@ -809,16 +839,24 @@ impl Segment {
                 if let column::Vector::Integer(v) = vector {
                     v.iter()
                 } else {
-                    panic!("don't support grouping on non-encoded values");
+                    panic!("don't support grouping on non-encoded values or time");
                 }
             })
             .collect::<Vec<_>>();
 
         // this tracks the last seen group key row. When it changes we can emit
         // the grouped aggregates.
+        let group_itrs_len = &group_itrs.len();
         let mut last_group_row = group_itrs
             .iter_mut()
-            .map(|itr| itr.next().unwrap())
+            .enumerate()
+            .map(|(i, itr)| {
+                if i == group_itrs_len - 1 {
+                    // time column - apply window function
+                    return itr.next().unwrap() / window * window;
+                }
+                *itr.next().unwrap()
+            })
             .collect::<Vec<_>>();
 
         let mut curr_group_row = last_group_row.clone();
@@ -832,9 +870,18 @@ impl Segment {
         while processed_rows < *total_rows {
             // update next group key.
             let mut group_key_changed = false;
-            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
-                let next_v = itr.next().unwrap();
-                if curr_v != &next_v {
+            for (i, (curr_v, itr)) in curr_group_row
+                .iter_mut()
+                .zip(group_itrs.iter_mut())
+                .enumerate()
+            {
+                let next_v = if i == group_itrs_len - 1 {
+                    // time column - apply window function
+                    itr.next().unwrap() / window * window
+                } else {
+                    *itr.next().unwrap()
+                };
+                if *curr_v != next_v {
                     group_key_changed = true;
                 }
                 *curr_v = next_v;
@@ -886,7 +933,7 @@ impl Segment {
             }
         }
 
-        let key = last_group_row.clone();
+        let key = last_group_row;
         results.insert(key, group_key_aggregates);
 
         log::info!("({:?} rows processed) {:?}", processed_rows, results);
@@ -894,6 +941,206 @@ impl Segment {
         BTreeMap::new()
     }
 
+    fn window_aggregate_with_sort_unsorted(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, Option<&column::Scalar>)],
+        group_columns: &[String],
+        aggregates: &[(String, AggregateType)],
+        window: i64,
+    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        // filter on predicates and time
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeMap::new();
+        }
+        let total_rows = &filtered_row_ids.cardinality();
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+
+        // materialise all encoded values for the matching rows in the columns
+        // we are grouping on and store each group as an iterator.
+        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
+        for group_column in group_columns {
+            if let Some(column) = self.column(&group_column) {
+                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    encoded_values.len()
+                );
+                group_column_encoded_values.push(Some(encoded_values));
+            } else {
+                group_column_encoded_values.push(None);
+            }
+        }
+
+        // TODO(edd): we could do this with an iterator I expect.
+        //
+        // materialise all decoded values for the rows in the columns we are
+        // aggregating on.
+        let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
+        for (column_name, _) in aggregates {
+            if let Some(column) = self.column(&column_name) {
+                let decoded_values = column.values(&filtered_row_ids_vec);
+                assert_eq!(
+                    filtered_row_ids.cardinality() as usize,
+                    decoded_values.len()
+                );
+                aggregate_column_decoded_values.push((column_name, Some(decoded_values)));
+            } else {
+                aggregate_column_decoded_values.push((column_name, None));
+            }
+        }
+
+        let mut all_columns = Vec::with_capacity(
+            group_column_encoded_values.len() + aggregate_column_decoded_values.len(),
+        );
+
+        for gc in group_column_encoded_values {
+            if let Some(p) = gc {
+                all_columns.push(p);
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        for ac in aggregate_column_decoded_values {
+            if let (_, Some(p)) = ac {
+                all_columns.push(p);
+            } else {
+                panic!("need to handle no results for filtering/grouping...");
+            }
+        }
+
+        let now = std::time::Instant::now();
+        if self.group_key_sorted(&group_columns) {
+            panic!("This shouldn't be called!!!");
+        } else {
+            // now sort on the first grouping columns. Right now the order doesn't matter...
+            let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
+            super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
+        }
+        log::debug!("time checking sort {:?}", now.elapsed());
+
+        let mut group_itrs = all_columns
+            .iter()
+            .take(group_columns.len()) // only use grouping columns
+            .map(|vector| {
+                if let column::Vector::Integer(v) = vector {
+                    v.iter()
+                } else {
+                    panic!("don't support grouping on non-encoded values");
+                }
+            })
+            .collect::<Vec<_>>();
+
+        let mut aggregate_itrs = all_columns
+            .iter()
+            .skip(group_columns.len()) // only use grouping columns
+            .map(|v| column::VectorIterator::new(v))
+            .collect::<Vec<_>>();
+
+        // this tracks the last seen group key row. When it changes we can emit
+        // the grouped aggregates.
+        let mut last_group_row = group_itrs
+            .iter_mut()
+            .enumerate()
+            .map(|(i, itr)| {
+                if i == group_columns.len() - 1 {
+                    // time column - apply window function
+                    return itr.next().unwrap() / window * window;
+                }
+                *itr.next().unwrap()
+            })
+            .collect::<Vec<_>>();
+
+        let mut curr_group_row = last_group_row.clone();
+
+        // this tracks the last row for each column we are aggregating.
+        let last_agg_row: Vec<column::Scalar> = aggregate_itrs
+            .iter_mut()
+            .map(|itr| itr.next().unwrap())
+            .collect();
+
+        // this keeps the current cumulative aggregates for the columns we
+        // are aggregating.
+        let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates
+            .iter()
+            .zip(last_agg_row.iter())
+            .map(|((col_name, agg_type), curr_agg)| {
+                let agg = match agg_type {
+                    AggregateType::Count => column::Aggregate::Count(1),
+                    AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()),
+                };
+                (col_name.clone(), agg)
+            })
+            .collect();
+
+        let mut results = BTreeMap::new();
+        let mut processed_rows = 1;
+        while processed_rows < *total_rows {
+            // update next group key.
+            let mut group_key_changed = false;
+            for (i, (curr_v, itr)) in curr_group_row
+                .iter_mut()
+                .zip(group_itrs.iter_mut())
+                .enumerate()
+            {
+                let next_v = if i == group_columns.len() - 1 {
+                    // time column - apply window function
+                    itr.next().unwrap() / window * window
+                } else {
+                    *itr.next().unwrap()
+                };
+                if curr_v != &next_v {
+                    group_key_changed = true;
+                }
+                *curr_v = next_v;
+            }
+
+            // group key changed - emit group row and aggregates.
+            if group_key_changed {
+                let key = last_group_row.clone();
+                results.insert(key, cum_aggregates.clone());
+
+                // update group key
+                last_group_row = curr_group_row.clone();
+
+                // reset cumulative aggregates
+                for (_, agg) in cum_aggregates.iter_mut() {
+                    match agg {
+                        column::Aggregate::Count(c) => {
+                            *c = 0;
+                        }
+                        column::Aggregate::Sum(s) => s.reset(),
+                    }
+                }
+            }
+
+            // update aggregates
+            for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) {
+                let (_, curr_agg) = bind.0;
+                let next_value = bind.1.next().unwrap();
+                curr_agg.update_with(next_value);
+            }
+
+            processed_rows += 1;
+        }
+
+        // Emit final row
+        results.insert(last_group_row, cum_aggregates);
+
+        log::info!("({:?} rows processed) {:?}", processed_rows, results);
+        // results
+        BTreeMap::new()
+    }
+
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
@@ -1434,7 +1681,6 @@ impl<'a> Segments<'a> {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
-        strategy: &GroupingStrategy,
         window: i64,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         let (min, max) = time_range;
@@ -1442,32 +1688,26 @@ impl<'a> Segments<'a> {
             panic!("max <= min");
         }
 
-        match strategy {
-            GroupingStrategy::HashGroup => {
-                panic!("not yet");
-            }
-            GroupingStrategy::HashGroupConcurrent => {
-                panic!("not yet");
-            }
-            GroupingStrategy::SortGroup => {
-                return self.read_group_eq_sort(
-                    time_range,
-                    predicates,
-                    group_columns,
-                    aggregates,
-                    false,
-                )
-            }
-            GroupingStrategy::SortGroupConcurrent => {
-                panic!("not yet");
-            }
+        // add time column to the group key
+        let mut group_columns = group_columns.clone();
+        group_columns.push("time".to_string());
+
+        for segment in &self.segments {
+            let now = std::time::Instant::now();
+            segment.window_aggregate_with_sort(
+                time_range,
+                predicates,
+                &group_columns,
+                &aggregates,
+                window,
+            );
+            log::info!(
+                "processed segment {:?} using windowed single-threaded sort in {:?}",
+                segment.time_range(),
+                now.elapsed()
+            )
         }
-
-        // TODO(edd): merge results - not expensive really...
-        // let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
-        //     BTreeMap::new();
-
-        // cum_results
+        BTreeMap::new()
     }
 
     /// Returns the maximum value for a column in a set of segments.
@@ -1594,6 +1834,7 @@ mod test {
             (vec![], true),
             (vec!["env", "role"], true),
             (vec!["env", "role", "foo"], false), // group key contains non-sorted col
+            (vec!["env", "role", "time"], false), // time may be out of order due to path column
             (vec!["env", "role", "path", "time"], true),
             (vec!["env", "role", "path", "time", "foo"], false), // group key contains non-sorted col
             (vec!["env", "path", "role"], true), // order of columns in group key does not matter

From ee46c194c85ce4d773107e39ffc864177f90279f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 27 Aug 2020 14:57:27 +0100
Subject: [PATCH 38/73] refactor: integrate windowing into grouping

---
 delorean_mem_qe/src/bin/main.rs |  65 +++++++++------
 delorean_mem_qe/src/segment.rs  | 142 +++++++++++++++++++++-----------
 2 files changed, 134 insertions(+), 73 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index e8bfc3dbd7..e3c631642f 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -120,10 +120,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                // if i < 363 {
-                //     i += 1;
-                //     continue;
-                // }
+                if i < 363 {
+                    i += 1;
+                    continue;
+                }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -485,6 +485,7 @@ fn time_group_by_multi_agg_count(store: &Store) {
                 &[],
                 vec!["status".to_string(), "method".to_string()],
                 vec![("counter".to_string(), AggregateType::Count)],
+                0,
                 strat,
             );
 
@@ -529,6 +530,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
                 &[],
                 vec!["env".to_string(), "role".to_string()],
                 vec![("counter".to_string(), AggregateType::Count)],
+                0,
                 strat,
             );
 
@@ -547,29 +549,40 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
 }
 
 fn time_window_agg_sorted_count(store: &Store) {
-    let repeat = 10;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut total_max = 0;
-    let segments = store.segments();
-    for _ in 0..repeat {
-        let now = std::time::Instant::now();
+    let strats = vec![
+        // GroupingStrategy::HashGroup,
+        // GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::SortGroup,
+        // GroupingStrategy::SortGroupConcurrent,
+    ];
 
-        let groups = segments.window_agg_eq(
-            (1589000000000001, 1590044410000000),
-            &[],
-            vec!["env".to_string(), "role".to_string()],
-            vec![("counter".to_string(), AggregateType::Count)],
-            60000000 * 10, // 10 minutes
+    for strat in &strats {
+        let repeat = 10;
+        let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+        let mut total_max = 0;
+        let segments = store.segments();
+        for _ in 0..repeat {
+            let now = std::time::Instant::now();
+
+            let groups = segments.read_group_eq(
+                (1589000000000001, 1590044410000000),
+                &[],
+                vec!["env".to_string(), "role".to_string()],
+                vec![("counter".to_string(), AggregateType::Count)],
+                60000000 * 10, // 10 minutes,
+                strat,
+            );
+
+            total_time += now.elapsed();
+            total_max += groups.len();
+        }
+        println!(
+            "time_window_agg_sorted_count {:?} ran {:?} in {:?} {:?} / call {:?}",
+            strat,
+            repeat,
+            total_time,
+            total_time / repeat,
+            total_max
         );
-
-        total_time += now.elapsed();
-        total_max += groups.len();
     }
-    println!(
-        "time_window_agg_sorted_count ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        total_max
-    );
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index e99c7e0181..bac015537e 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -417,6 +417,7 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
+        window: i64,
     ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
         if self.group_key_sorted(group_columns) {
             log::info!("group key is already sorted {:?}", group_columns);
@@ -425,13 +426,16 @@ impl Segment {
                 predicates,
                 group_columns,
                 aggregates,
+                window,
             )
         } else {
+            log::info!("group key needs sorting {:?}", group_columns);
             self.aggregate_by_group_with_sort_unsorted(
                 time_range,
                 predicates,
                 group_columns,
                 aggregates,
+                window,
             )
         }
     }
@@ -442,7 +446,17 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
+        window: i64,
     ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        log::debug!("aggregate_by_group_with_sort_unsorted called");
+
+        if window > 0 {
+            // last column on group key should be time.
+            assert_eq!(group_columns[group_columns.len() - 1], "time");
+        } else {
+            assert_ne!(group_columns[group_columns.len() - 1], "time");
+        }
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -543,9 +557,17 @@ impl Segment {
 
         // this tracks the last seen group key row. When it changes we can emit
         // the grouped aggregates.
+        let group_itrs_len = &group_itrs.len();
         let mut last_group_row = group_itrs
             .iter_mut()
-            .map(|itr| itr.next().unwrap())
+            .enumerate()
+            .map(|(i, itr)| {
+                if i == group_itrs_len - 1 && window > 0 {
+                    // time column - apply window function
+                    return itr.next().unwrap() / window * window;
+                }
+                *itr.next().unwrap()
+            })
             .collect::<Vec<_>>();
 
         let mut curr_group_row = last_group_row.clone();
@@ -575,8 +597,17 @@ impl Segment {
         while processed_rows < *total_rows {
             // update next group key.
             let mut group_key_changed = false;
-            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
-                let next_v = itr.next().unwrap();
+            for (i, (curr_v, itr)) in curr_group_row
+                .iter_mut()
+                .zip(group_itrs.iter_mut())
+                .enumerate()
+            {
+                let next_v = if i == group_itrs_len - 1 && window > 0 {
+                    // time column - apply window function
+                    itr.next().unwrap() / window * window
+                } else {
+                    *itr.next().unwrap()
+                };
                 if curr_v != &next_v {
                     group_key_changed = true;
                 }
@@ -615,7 +646,7 @@ impl Segment {
         // Emit final row
         results.insert(last_group_row, cum_aggregates);
 
-        log::debug!("{:?}", results);
+        log::info!("({:?} rows processed) {:?}", processed_rows, results);
         // results
         BTreeMap::new()
     }
@@ -628,7 +659,17 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
+        window: i64,
     ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+        log::debug!("aggregate_by_group_with_sort_sorted called");
+
+        if window > 0 {
+            // last column on group key should be time.
+            assert_eq!(group_columns[group_columns.len() - 1], "time");
+        } else {
+            assert_ne!(group_columns[group_columns.len() - 1], "time");
+        }
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -679,9 +720,17 @@ impl Segment {
 
         // this tracks the last seen group key row. When it changes we can emit
         // the grouped aggregates.
+        let group_itrs_len = &group_itrs.len();
         let mut last_group_row = group_itrs
             .iter_mut()
-            .map(|itr| itr.next().unwrap())
+            .enumerate()
+            .map(|(i, itr)| {
+                if i == group_itrs_len - 1 && window > 0 {
+                    // time column - apply window function
+                    return itr.next().unwrap() / window * window;
+                }
+                *itr.next().unwrap()
+            })
             .collect::<Vec<_>>();
 
         let mut curr_group_row = last_group_row.clone();
@@ -695,8 +744,17 @@ impl Segment {
         while processed_rows < *total_rows {
             // update next group key.
             let mut group_key_changed = false;
-            for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) {
-                let next_v = itr.next().unwrap();
+            for (i, (curr_v, itr)) in curr_group_row
+                .iter_mut()
+                .zip(group_itrs.iter_mut())
+                .enumerate()
+            {
+                let next_v = if i == group_itrs_len - 1 && window > 0 {
+                    // time column - apply window function
+                    itr.next().unwrap() / window * window
+                } else {
+                    *itr.next().unwrap()
+                };
                 if curr_v != &next_v {
                     group_key_changed = true;
                 }
@@ -749,7 +807,7 @@ impl Segment {
             }
         }
 
-        let key = last_group_row.clone();
+        let key = last_group_row;
         results.insert(key, group_key_aggregates);
 
         log::info!("({:?} rows processed) {:?}", processed_rows, results);
@@ -1447,6 +1505,7 @@ impl<'a> Segments<'a> {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
+        window: i64,
         strategy: &GroupingStrategy,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
         let (min, max) = time_range;
@@ -1456,48 +1515,28 @@ impl<'a> Segments<'a> {
 
         match strategy {
             GroupingStrategy::HashGroup => {
-                return self.read_group_eq_hash(
-                    time_range,
-                    predicates,
-                    group_columns,
-                    aggregates,
-                    false,
-                )
+                self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, false)
             }
             GroupingStrategy::HashGroupConcurrent => {
-                return self.read_group_eq_hash(
-                    time_range,
-                    predicates,
-                    group_columns,
-                    aggregates,
-                    true,
-                )
-            }
-            GroupingStrategy::SortGroup => {
-                return self.read_group_eq_sort(
-                    time_range,
-                    predicates,
-                    group_columns,
-                    aggregates,
-                    false,
-                )
-            }
-            GroupingStrategy::SortGroupConcurrent => {
-                return self.read_group_eq_sort(
-                    time_range,
-                    predicates,
-                    group_columns,
-                    aggregates,
-                    true,
-                )
+                self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, true)
             }
+            GroupingStrategy::SortGroup => self.read_group_eq_sort(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+                false,
+            ),
+            GroupingStrategy::SortGroupConcurrent => self.read_group_eq_sort(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+                true,
+            ),
         }
-
-        // TODO(edd): merge results - not expensive really...
-        // let mut cum_results: BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> =
-        //     BTreeMap::new();
-
-        // cum_results
     }
 
     fn read_group_eq_hash(
@@ -1581,10 +1620,16 @@ impl<'a> Segments<'a> {
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: Vec<String>,
+        mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
+        window: i64,
         concurrent: bool,
     ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+        if window > 0 {
+            // add time column to the group key
+            group_columns.push("time".to_string());
+        }
+
         if concurrent {
             let group_columns_arc = std::sync::Arc::new(group_columns);
             let aggregates_arc = std::sync::Arc::new(aggregates);
@@ -1602,6 +1647,7 @@ impl<'a> Segments<'a> {
                                 predicates,
                                 &group_columns,
                                 &aggregates,
+                                window,
                             );
                             log::info!(
                                 "processed segment {:?} using multi-threaded sort in {:?}",
@@ -1622,6 +1668,7 @@ impl<'a> Segments<'a> {
                     predicates,
                     &group_columns_arc.clone(),
                     &aggregates_arc.clone(),
+                    window,
                 );
                 log::info!(
                     "processed segment {:?} using multi-threaded sort in {:?}",
@@ -1643,6 +1690,7 @@ impl<'a> Segments<'a> {
                 predicates,
                 &group_columns,
                 &aggregates,
+                window,
             );
             log::info!(
                 "processed segment {:?} using single-threaded sort in {:?}",

From 6e8e11c09cb4f3c9a10b558d580486ca9b610768 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 27 Aug 2020 15:09:51 +0100
Subject: [PATCH 39/73] refactor: move group methods to sort/stream

---
 delorean_mem_qe/src/bin/main.rs |   2 +-
 delorean_mem_qe/src/segment.rs  | 201 +++++++++++++++-----------------
 2 files changed, 98 insertions(+), 105 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index e3c631642f..9fcc3590cc 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -567,7 +567,7 @@ fn time_window_agg_sorted_count(store: &Store) {
             let groups = segments.read_group_eq(
                 (1589000000000001, 1590044410000000),
                 &[],
-                vec!["env".to_string(), "role".to_string()],
+                vec!["env".to_string(), "role".to_string(), "path".to_string()],
                 vec![("counter".to_string(), AggregateType::Count)],
                 60000000 * 10, // 10 minutes,
                 strat,
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index bac015537e..fa298fe74f 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -411,36 +411,7 @@ impl Segment {
         BTreeMap::new()
     }
 
-    pub fn aggregate_by_group_with_sort(
-        &self,
-        time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: &[String],
-        aggregates: &[(String, AggregateType)],
-        window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
-        if self.group_key_sorted(group_columns) {
-            log::info!("group key is already sorted {:?}", group_columns);
-            self.aggregate_by_group_with_sort_sorted(
-                time_range,
-                predicates,
-                group_columns,
-                aggregates,
-                window,
-            )
-        } else {
-            log::info!("group key needs sorting {:?}", group_columns);
-            self.aggregate_by_group_with_sort_unsorted(
-                time_range,
-                predicates,
-                group_columns,
-                aggregates,
-                window,
-            )
-        }
-    }
-
-    fn aggregate_by_group_with_sort_unsorted(
+    pub fn aggregate_by_group_using_sort(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
@@ -651,9 +622,13 @@ impl Segment {
         BTreeMap::new()
     }
 
-    // this method assumes that the segment's columns are sorted such that a
-    // sort of columns is not required.
-    fn aggregate_by_group_with_sort_sorted(
+    // Executes aggregates grouping by group_columns. If window is positive then
+    // a windowed aggregate result set is produced.
+    //
+    // `aggregate_by_group_using_stream` assumes that all columns being grouped
+    // on are part of the overall segment sort, therefore it does no sorting or
+    // hashing, and just streams aggregates out in order.
+    pub fn aggregate_by_group_using_stream(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
@@ -1641,19 +1616,36 @@ impl<'a> Segments<'a> {
                         let aggregates = aggregates_arc.clone();
 
                         scope.spawn(move |_| {
+                            let sorted = segment.group_key_sorted(&group_columns);
+
                             let now = std::time::Instant::now();
-                            segment.aggregate_by_group_with_sort(
-                                time_range,
-                                predicates,
-                                &group_columns,
-                                &aggregates,
-                                window,
-                            );
-                            log::info!(
-                                "processed segment {:?} using multi-threaded sort in {:?}",
-                                segment.time_range(),
-                                now.elapsed()
-                            )
+                            if sorted {
+                                segment.aggregate_by_group_using_stream(
+                                    time_range,
+                                    predicates,
+                                    &group_columns,
+                                    &aggregates,
+                                    window,
+                                );
+                                log::info!(
+                                    "processed segment {:?} using multi-threaded STREAM in {:?}",
+                                    segment.time_range(),
+                                    now.elapsed()
+                                )
+                            } else {
+                                segment.aggregate_by_group_using_sort(
+                                    time_range,
+                                    predicates,
+                                    &group_columns,
+                                    &aggregates,
+                                    window,
+                                );
+                                log::info!(
+                                    "processed segment {:?} using multi-threaded SORT in {:?}",
+                                    segment.time_range(),
+                                    now.elapsed()
+                                )
+                            }
                         });
                     }
                 })
@@ -1662,19 +1654,38 @@ impl<'a> Segments<'a> {
 
             let rem = self.segments.len() % THREADS;
             for segment in &self.segments[self.segments.len() - rem..] {
+                let group_columns = group_columns_arc.clone();
+                let aggregates = aggregates_arc.clone();
+                let sorted = segment.group_key_sorted(&group_columns);
+
                 let now = std::time::Instant::now();
-                segment.aggregate_by_group_with_sort(
-                    time_range,
-                    predicates,
-                    &group_columns_arc.clone(),
-                    &aggregates_arc.clone(),
-                    window,
-                );
-                log::info!(
-                    "processed segment {:?} using multi-threaded sort in {:?}",
-                    segment.time_range(),
-                    now.elapsed()
-                )
+                if sorted {
+                    segment.aggregate_by_group_using_stream(
+                        time_range,
+                        predicates,
+                        &group_columns,
+                        &aggregates,
+                        window,
+                    );
+                    log::info!(
+                        "processed segment {:?} using multi-threaded STREAM in {:?}",
+                        segment.time_range(),
+                        now.elapsed()
+                    )
+                } else {
+                    segment.aggregate_by_group_using_sort(
+                        time_range,
+                        predicates,
+                        &group_columns,
+                        &aggregates,
+                        window,
+                    );
+                    log::info!(
+                        "processed segment {:?} using multi-threaded SORT in {:?}",
+                        segment.time_range(),
+                        now.elapsed()
+                    )
+                }
             }
 
             // TODO(edd): aggregate the aggregates. not expensive
@@ -1684,19 +1695,36 @@ impl<'a> Segments<'a> {
         // Single threaded
 
         for segment in &self.segments {
+            let sorted = segment.group_key_sorted(&group_columns);
+
             let now = std::time::Instant::now();
-            segment.aggregate_by_group_with_sort(
-                time_range,
-                predicates,
-                &group_columns,
-                &aggregates,
-                window,
-            );
-            log::info!(
-                "processed segment {:?} using single-threaded sort in {:?}",
-                segment.time_range(),
-                now.elapsed()
-            )
+            if sorted {
+                segment.aggregate_by_group_using_stream(
+                    time_range,
+                    predicates,
+                    &group_columns,
+                    &aggregates,
+                    window,
+                );
+                log::info!(
+                    "processed segment {:?} using single-threaded STREAM in {:?}",
+                    segment.time_range(),
+                    now.elapsed()
+                )
+            } else {
+                segment.aggregate_by_group_using_sort(
+                    time_range,
+                    predicates,
+                    &group_columns,
+                    &aggregates,
+                    window,
+                );
+                log::info!(
+                    "processed segment {:?} using single-threaded SORT in {:?}",
+                    segment.time_range(),
+                    now.elapsed()
+                )
+            }
         }
 
         BTreeMap::new()
@@ -1723,41 +1751,6 @@ impl<'a> Segments<'a> {
         min_min
     }
 
-    pub fn window_agg_eq(
-        &self,
-        time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: Vec<String>,
-        aggregates: Vec<(String, AggregateType)>,
-        window: i64,
-    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
-        let (min, max) = time_range;
-        if max <= min {
-            panic!("max <= min");
-        }
-
-        // add time column to the group key
-        let mut group_columns = group_columns.clone();
-        group_columns.push("time".to_string());
-
-        for segment in &self.segments {
-            let now = std::time::Instant::now();
-            segment.window_aggregate_with_sort(
-                time_range,
-                predicates,
-                &group_columns,
-                &aggregates,
-                window,
-            );
-            log::info!(
-                "processed segment {:?} using windowed single-threaded sort in {:?}",
-                segment.time_range(),
-                now.elapsed()
-            )
-        }
-        BTreeMap::new()
-    }
-
     /// Returns the maximum value for a column in a set of segments.
     pub fn column_max(&self, column_name: &str) -> Option<column::Scalar> {
         if self.segments.is_empty() {

From a1d57270fd83d4548f1a9a3d49c087dcc7825f04 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 27 Aug 2020 20:41:20 +0100
Subject: [PATCH 40/73] refactor: DRY up grouped aggregates to do windowing

---
 delorean_mem_qe/src/bin/main.rs |  10 +-
 delorean_mem_qe/src/column.rs   | 130 ++++---
 delorean_mem_qe/src/segment.rs  | 605 ++++----------------------------
 3 files changed, 164 insertions(+), 581 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 9fcc3590cc..be480e8def 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -71,7 +71,7 @@ fn main() {
     // time_group_single_with_pred(&store);
     // time_group_by_multi_agg_count(&store);
     // time_group_by_multi_agg_sorted_count(&store);
-    time_window_agg_sorted_count(&store);
+    time_window_agg_count(&store);
 }
 
 fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
@@ -548,7 +548,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
     }
 }
 
-fn time_window_agg_sorted_count(store: &Store) {
+fn time_window_agg_count(store: &Store) {
     let strats = vec![
         // GroupingStrategy::HashGroup,
         // GroupingStrategy::HashGroupConcurrent,
@@ -557,7 +557,7 @@ fn time_window_agg_sorted_count(store: &Store) {
     ];
 
     for strat in &strats {
-        let repeat = 10;
+        let repeat = 10000;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
@@ -567,7 +567,7 @@ fn time_window_agg_sorted_count(store: &Store) {
             let groups = segments.read_group_eq(
                 (1589000000000001, 1590044410000000),
                 &[],
-                vec!["env".to_string(), "role".to_string(), "path".to_string()],
+                vec!["env".to_string(), "role".to_string()],
                 vec![("counter".to_string(), AggregateType::Count)],
                 60000000 * 10, // 10 minutes,
                 strat,
@@ -577,7 +577,7 @@ fn time_window_agg_sorted_count(store: &Store) {
             total_max += groups.len();
         }
         println!(
-            "time_window_agg_sorted_count {:?} ran {:?} in {:?} {:?} / call {:?}",
+            "time_window_agg_count {:?} ran {:?} in {:?} {:?} / call {:?}",
             strat,
             repeat,
             total_time,
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 3ce8ee0357..bc89cb23bd 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -156,35 +156,15 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
     }
 }
 
-// impl<'a> std::ops::Add<&Scalar<'a>> for Aggregate<'a> {
-//     type Output = Aggregate<'a>;
-
-//     fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
-//         match _rhs {
-//             Scalar::String(v) => {}
-//             Scalar::Float(v) => {}
-//             Scalar::Integer(v) => {}
-//         }
-//         // match self {
-//         //     Self::Count(c) => {
-//         //         match
-//         //         if let Scalar::Count(other) = _rhs {
-//         //             return Self::Count(c + other);
-//         //         } else {
-//         //             panic!("invalid");
-//         //         };
-//         //     }
-//         //     Self::Sum(s) => {
-//         //         if let Self::Sum(other) = _rhs {
-//         //             return Self::Sum(s + other);
-//         //         } else {
-//         //             panic!("invalid");
-//         //         };
-//         //     }
-//         // }
-//     }
-// }
-
+pub trait AggregatableByRange {
+    fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate;
+}
+/// A Vector is a materialised vector of values from a column.
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
     Float(Vec<f64>),
@@ -192,27 +172,48 @@ pub enum Vector<'a> {
 }
 
 impl<'a> Vector<'a> {
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn len(&self) -> usize {
-        match self {
-            Self::String(v) => v.len(),
-            Self::Float(v) => v.len(),
-            Self::Integer(v) => v.len(),
+    pub fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate {
+        match agg_type {
+            AggregateType::Count => {
+                Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
+            }
+            AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
         }
     }
 
-    pub fn get(&self, i: usize) -> Scalar<'a> {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar {
         match self {
-            // FIXME(edd): SORT THIS OPTION OUT
-            Self::String(v) => Scalar::String(v[i].as_ref().unwrap()),
-            Self::Float(v) => Scalar::Float(v[i]),
-            Self::Integer(v) => Scalar::Integer(v[i]),
+            Vector::String(_) => {
+                panic!("can't sum strings....");
+            }
+            Vector::Float(values) => {
+                let mut res = 0.0;
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    res += *v;
+                }
+                Scalar::Float(res)
+            }
+            Vector::Integer(values) => {
+                let mut res = 0;
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    res += *v;
+                }
+                Scalar::Integer(res)
+            }
         }
     }
 
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        to_row_id - from_row_id
+    }
+
     pub fn extend(&mut self, other: Self) {
         match self {
             Self::String(v) => {
@@ -239,6 +240,27 @@ impl<'a> Vector<'a> {
         }
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            Self::String(v) => v.len(),
+            Self::Float(v) => v.len(),
+            Self::Integer(v) => v.len(),
+        }
+    }
+
+    pub fn get(&self, i: usize) -> Scalar<'a> {
+        match self {
+            // FIXME(edd): SORT THIS OPTION OUT
+            Self::String(v) => Scalar::String(v[i].as_ref().unwrap()),
+            Self::Float(v) => Scalar::Float(v[i]),
+            Self::Integer(v) => Scalar::Integer(v[i]),
+        }
+    }
+
     pub fn swap(&mut self, a: usize, b: usize) {
         match self {
             Self::String(v) => {
@@ -254,6 +276,17 @@ impl<'a> Vector<'a> {
     }
 }
 
+impl AggregatableByRange for &Vector<'_> {
+    fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate {
+        Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
+    }
+}
+
 /// VectorIterator allows a `Vector` to be iterated. Until vectors are drained
 /// Scalar values are emitted.
 pub struct VectorIterator<'a> {
@@ -883,6 +916,17 @@ impl Column {
     }
 }
 
+impl AggregatableByRange for &Column {
+    fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate {
+        Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
+    }
+}
+
 impl From<&[f64]> for Column {
     fn from(values: &[f64]) -> Self {
         Self::Float(Float::from(values))
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index fa298fe74f..a9da3a6543 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -418,7 +418,7 @@ impl Segment {
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+    ) -> Vec<GroupedAggregates> {
         log::debug!("aggregate_by_group_with_sort_unsorted called");
 
         if window > 0 {
@@ -433,10 +433,9 @@ impl Segment {
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
             filtered_row_ids = row_ids;
         } else {
-            return BTreeMap::new();
+            return vec![];
         }
         let total_rows = &filtered_row_ids.cardinality();
-        // println!("TOTAL FILTERED ROWS {:?}", total_rows);
 
         let filtered_row_ids_vec = filtered_row_ids
             .to_vec()
@@ -508,7 +507,7 @@ impl Segment {
         }
         log::debug!("time checking sort {:?}", now.elapsed());
 
-        let mut group_itrs = all_columns
+        let group_itrs = all_columns
             .iter()
             .take(group_columns.len()) // only use grouping columns
             .map(|vector| {
@@ -520,106 +519,16 @@ impl Segment {
             })
             .collect::<Vec<_>>();
 
-        let mut aggregate_itrs = all_columns
+        let mut aggregate_cols = Vec::with_capacity(aggregates.len());
+        for (sorted_vector, (col_name, agg_type)) in all_columns
             .iter()
-            .skip(group_columns.len()) // only use grouping columns
-            .map(|v| column::VectorIterator::new(v))
-            .collect::<Vec<_>>();
-
-        // this tracks the last seen group key row. When it changes we can emit
-        // the grouped aggregates.
-        let group_itrs_len = &group_itrs.len();
-        let mut last_group_row = group_itrs
-            .iter_mut()
-            .enumerate()
-            .map(|(i, itr)| {
-                if i == group_itrs_len - 1 && window > 0 {
-                    // time column - apply window function
-                    return itr.next().unwrap() / window * window;
-                }
-                *itr.next().unwrap()
-            })
-            .collect::<Vec<_>>();
-
-        let mut curr_group_row = last_group_row.clone();
-
-        // this tracks the last row for each column we are aggregating.
-        let last_agg_row: Vec<column::Scalar> = aggregate_itrs
-            .iter_mut()
-            .map(|itr| itr.next().unwrap())
-            .collect();
-
-        // this keeps the current cumulative aggregates for the columns we
-        // are aggregating.
-        let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates
-            .iter()
-            .zip(last_agg_row.iter())
-            .map(|((col_name, agg_type), curr_agg)| {
-                let agg = match agg_type {
-                    AggregateType::Count => column::Aggregate::Count(1),
-                    AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()),
-                };
-                (col_name.clone(), agg)
-            })
-            .collect();
-
-        let mut results = BTreeMap::new();
-        let mut processed_rows = 1;
-        while processed_rows < *total_rows {
-            // update next group key.
-            let mut group_key_changed = false;
-            for (i, (curr_v, itr)) in curr_group_row
-                .iter_mut()
-                .zip(group_itrs.iter_mut())
-                .enumerate()
-            {
-                let next_v = if i == group_itrs_len - 1 && window > 0 {
-                    // time column - apply window function
-                    itr.next().unwrap() / window * window
-                } else {
-                    *itr.next().unwrap()
-                };
-                if curr_v != &next_v {
-                    group_key_changed = true;
-                }
-                *curr_v = next_v;
-            }
-
-            // group key changed - emit group row and aggregates.
-            if group_key_changed {
-                let key = last_group_row.clone();
-                results.insert(key, cum_aggregates.clone());
-
-                // update group key
-                last_group_row = curr_group_row.clone();
-
-                // reset cumulative aggregates
-                for (_, agg) in cum_aggregates.iter_mut() {
-                    match agg {
-                        column::Aggregate::Count(c) => {
-                            *c = 0;
-                        }
-                        column::Aggregate::Sum(s) => s.reset(),
-                    }
-                }
-            }
-
-            // update aggregates
-            for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) {
-                let (_, curr_agg) = bind.0;
-                let next_value = bind.1.next().unwrap();
-                curr_agg.update_with(next_value);
-            }
-
-            processed_rows += 1;
+            .skip(group_columns.len())
+            .zip(aggregates.iter())
+        {
+            aggregate_cols.push((col_name, agg_type, sorted_vector));
         }
 
-        // Emit final row
-        results.insert(last_group_row, cum_aggregates);
-
-        log::info!("({:?} rows processed) {:?}", processed_rows, results);
-        // results
-        BTreeMap::new()
+        Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window)
     }
 
     // Executes aggregates grouping by group_columns. If window is positive then
@@ -628,14 +537,14 @@ impl Segment {
     // `aggregate_by_group_using_stream` assumes that all columns being grouped
     // on are part of the overall segment sort, therefore it does no sorting or
     // hashing, and just streams aggregates out in order.
-    pub fn aggregate_by_group_using_stream(
+    pub fn aggregate_by_group_using_stream<'a>(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
+    ) -> Vec<GroupedAggregates<'a>> {
         log::debug!("aggregate_by_group_with_sort_sorted called");
 
         if window > 0 {
@@ -650,7 +559,7 @@ impl Segment {
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
             filtered_row_ids = row_ids;
         } else {
-            return BTreeMap::new();
+            return vec![];
         }
         let total_rows = &filtered_row_ids.cardinality();
 
@@ -677,12 +586,7 @@ impl Segment {
             }
         }
 
-        let mut new_agg_cols = Vec::with_capacity(aggregates.len());
-        for (column_name, agg_type) in aggregates {
-            new_agg_cols.push((column_name, agg_type, self.column(&column_name)));
-        }
-
-        let mut group_itrs = group_column_encoded_values
+        let group_itrs = group_column_encoded_values
             .iter()
             .map(|vector| {
                 if let column::Vector::Integer(v) = vector {
@@ -693,6 +597,22 @@ impl Segment {
             })
             .collect::<Vec<_>>();
 
+        let mut aggregate_cols = Vec::with_capacity(aggregates.len());
+        for (column_name, agg_type) in aggregates {
+            aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap()));
+        }
+
+        Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window)
+    }
+
+    // Once the rows necessary for doing a (windowed) grouped aggregate are ready
+    // this method will build a result set of aggregates in a streaming way.
+    pub fn stream_grouped_aggregates<'a>(
+        mut group_itrs: Vec<core::slice::Iter<i64>>,
+        aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>,
+        total_rows: usize,
+        window: i64,
+    ) -> Vec<GroupedAggregates<'a>> {
         // this tracks the last seen group key row. When it changes we can emit
         // the grouped aggregates.
         let group_itrs_len = &group_itrs.len();
@@ -710,13 +630,14 @@ impl Segment {
 
         let mut curr_group_row = last_group_row.clone();
 
-        let mut results = BTreeMap::new();
+        let mut results = vec![];
+
         let mut processed_rows = 1;
 
         let mut group_key_start_row_id = 0;
         let mut group_size = 0;
 
-        while processed_rows < *total_rows {
+        while processed_rows < total_rows {
             // update next group key.
             let mut group_key_changed = false;
             for (i, (curr_v, itr)) in curr_group_row
@@ -738,22 +659,23 @@ impl Segment {
 
             // group key changed - emit group row and aggregates.
             if group_key_changed {
-                let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
-                for (name, agg_type, col) in &new_agg_cols {
-                    if let Some(c) = col {
-                        let agg_result = c.aggregate_by_id_range(
-                            agg_type,
-                            group_key_start_row_id,
-                            group_key_start_row_id + group_size,
-                        );
-                        group_key_aggregates.push((name, agg_result));
-                    } else {
-                        panic!("figure this out");
-                    }
+                let mut group_key_aggregates = Vec::with_capacity(aggregate_cols.len());
+                for (name, agg_type, vector) in &aggregate_cols {
+                    let agg_result = vector.aggregate_by_id_range(
+                        agg_type,
+                        group_key_start_row_id,
+                        group_key_start_row_id + group_size,
+                    );
+
+                    let col_name = name.to_owned().clone();
+                    group_key_aggregates.push((col_name, agg_result));
                 }
 
                 let key = last_group_row.clone();
-                results.insert(key, group_key_aggregates);
+                results.push(GroupedAggregates {
+                    group_key: key,
+                    aggregates: group_key_aggregates,
+                });
 
                 // update group key
                 last_group_row = curr_group_row.clone();
@@ -768,410 +690,27 @@ impl Segment {
         }
 
         // Emit final row
-        let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
-        for (name, agg_type, col) in &new_agg_cols {
-            if let Some(c) = col {
-                let agg_result = c.aggregate_by_id_range(
-                    agg_type,
-                    group_key_start_row_id,
-                    group_key_start_row_id + group_size,
-                );
-                group_key_aggregates.push((name, agg_result));
-            } else {
-                panic!("figure this out");
-            }
+        let mut group_key_aggregates = Vec::with_capacity(aggregate_cols.len());
+        for (name, agg_type, vector) in &aggregate_cols {
+            let agg_result = vector.aggregate_by_id_range(
+                agg_type,
+                group_key_start_row_id,
+                group_key_start_row_id + group_size,
+            );
+
+            // TODO(edd): fix weirdness
+            let col_name = name.to_owned().clone();
+            group_key_aggregates.push((col_name, agg_result));
         }
 
-        let key = last_group_row;
-        results.insert(key, group_key_aggregates);
+        results.push(GroupedAggregates {
+            group_key: last_group_row,
+            aggregates: group_key_aggregates,
+        });
 
         log::info!("({:?} rows processed) {:?}", processed_rows, results);
         // results
-        BTreeMap::new()
-    }
-
-    pub fn window_aggregate_with_sort(
-        &self,
-        time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: &[String],
-        aggregates: &[(String, AggregateType)],
-        window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
-        if self.group_key_sorted(group_columns) {
-            log::info!("group key is already sorted {:?}", group_columns);
-            self.window_aggregate_with_sort_sorted(
-                time_range,
-                predicates,
-                group_columns,
-                aggregates,
-                window,
-            )
-        } else {
-            log::info!("group key needs sorting {:?}", group_columns);
-            self.window_aggregate_with_sort_unsorted(
-                time_range,
-                predicates,
-                group_columns,
-                aggregates,
-                window,
-            )
-        }
-    }
-
-    // this method assumes that the segment's columns are sorted such that a
-    // sort of columns is not required.
-    fn window_aggregate_with_sort_sorted(
-        &self,
-        time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: &[String],
-        aggregates: &[(String, AggregateType)],
-        window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
-        // filter on predicates and time
-        let filtered_row_ids: croaring::Bitmap;
-        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
-            filtered_row_ids = row_ids;
-        } else {
-            return BTreeMap::new();
-        }
-        let total_rows = &filtered_row_ids.cardinality();
-
-        let filtered_row_ids_vec = filtered_row_ids
-            .to_vec()
-            .iter()
-            .map(|v| *v as usize)
-            .collect::<Vec<_>>();
-
-        // materialise all encoded values for the matching rows in the columns
-        // we are grouping on and store each group as an iterator.
-        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
-        for group_column in group_columns {
-            if let Some(column) = self.column(&group_column) {
-                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
-                assert_eq!(
-                    filtered_row_ids.cardinality() as usize,
-                    encoded_values.len()
-                );
-
-                group_column_encoded_values.push(encoded_values);
-            } else {
-                panic!("need to handle no results for filtering/grouping...");
-            }
-        }
-
-        let mut new_agg_cols = Vec::with_capacity(aggregates.len());
-        for (column_name, agg_type) in aggregates {
-            new_agg_cols.push((column_name, agg_type, self.column(&column_name)));
-        }
-
-        let mut group_itrs = group_column_encoded_values
-            .iter()
-            .map(|vector| {
-                if let column::Vector::Integer(v) = vector {
-                    v.iter()
-                } else {
-                    panic!("don't support grouping on non-encoded values or time");
-                }
-            })
-            .collect::<Vec<_>>();
-
-        // this tracks the last seen group key row. When it changes we can emit
-        // the grouped aggregates.
-        let group_itrs_len = &group_itrs.len();
-        let mut last_group_row = group_itrs
-            .iter_mut()
-            .enumerate()
-            .map(|(i, itr)| {
-                if i == group_itrs_len - 1 {
-                    // time column - apply window function
-                    return itr.next().unwrap() / window * window;
-                }
-                *itr.next().unwrap()
-            })
-            .collect::<Vec<_>>();
-
-        let mut curr_group_row = last_group_row.clone();
-
-        let mut results = BTreeMap::new();
-        let mut processed_rows = 1;
-
-        let mut group_key_start_row_id = 0;
-        let mut group_size = 0;
-
-        while processed_rows < *total_rows {
-            // update next group key.
-            let mut group_key_changed = false;
-            for (i, (curr_v, itr)) in curr_group_row
-                .iter_mut()
-                .zip(group_itrs.iter_mut())
-                .enumerate()
-            {
-                let next_v = if i == group_itrs_len - 1 {
-                    // time column - apply window function
-                    itr.next().unwrap() / window * window
-                } else {
-                    *itr.next().unwrap()
-                };
-                if *curr_v != next_v {
-                    group_key_changed = true;
-                }
-                *curr_v = next_v;
-            }
-
-            // group key changed - emit group row and aggregates.
-            if group_key_changed {
-                let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
-                for (name, agg_type, col) in &new_agg_cols {
-                    if let Some(c) = col {
-                        let agg_result = c.aggregate_by_id_range(
-                            agg_type,
-                            group_key_start_row_id,
-                            group_key_start_row_id + group_size,
-                        );
-                        group_key_aggregates.push((name, agg_result));
-                    } else {
-                        panic!("figure this out");
-                    }
-                }
-
-                let key = last_group_row.clone();
-                results.insert(key, group_key_aggregates);
-
-                // update group key
-                last_group_row = curr_group_row.clone();
-
-                // reset counters tracking group key row range
-                group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one?
-                group_size = 0;
-            }
-
-            group_size += 1;
-            processed_rows += 1;
-        }
-
-        // Emit final row
-        let mut group_key_aggregates = Vec::with_capacity(aggregates.len());
-        for (name, agg_type, col) in &new_agg_cols {
-            if let Some(c) = col {
-                let agg_result = c.aggregate_by_id_range(
-                    agg_type,
-                    group_key_start_row_id,
-                    group_key_start_row_id + group_size,
-                );
-                group_key_aggregates.push((name, agg_result));
-            } else {
-                panic!("figure this out");
-            }
-        }
-
-        let key = last_group_row;
-        results.insert(key, group_key_aggregates);
-
-        log::info!("({:?} rows processed) {:?}", processed_rows, results);
-        // results
-        BTreeMap::new()
-    }
-
-    fn window_aggregate_with_sort_unsorted(
-        &self,
-        time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: &[String],
-        aggregates: &[(String, AggregateType)],
-        window: i64,
-    ) -> BTreeMap<Vec<&i64>, Vec<(String, column::Aggregate)>> {
-        // filter on predicates and time
-        let filtered_row_ids: croaring::Bitmap;
-        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
-            filtered_row_ids = row_ids;
-        } else {
-            return BTreeMap::new();
-        }
-        let total_rows = &filtered_row_ids.cardinality();
-
-        let filtered_row_ids_vec = filtered_row_ids
-            .to_vec()
-            .iter()
-            .map(|v| *v as usize)
-            .collect::<Vec<_>>();
-
-        // materialise all encoded values for the matching rows in the columns
-        // we are grouping on and store each group as an iterator.
-        let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
-        for group_column in group_columns {
-            if let Some(column) = self.column(&group_column) {
-                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
-                assert_eq!(
-                    filtered_row_ids.cardinality() as usize,
-                    encoded_values.len()
-                );
-                group_column_encoded_values.push(Some(encoded_values));
-            } else {
-                group_column_encoded_values.push(None);
-            }
-        }
-
-        // TODO(edd): we could do this with an iterator I expect.
-        //
-        // materialise all decoded values for the rows in the columns we are
-        // aggregating on.
-        let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
-        for (column_name, _) in aggregates {
-            if let Some(column) = self.column(&column_name) {
-                let decoded_values = column.values(&filtered_row_ids_vec);
-                assert_eq!(
-                    filtered_row_ids.cardinality() as usize,
-                    decoded_values.len()
-                );
-                aggregate_column_decoded_values.push((column_name, Some(decoded_values)));
-            } else {
-                aggregate_column_decoded_values.push((column_name, None));
-            }
-        }
-
-        let mut all_columns = Vec::with_capacity(
-            group_column_encoded_values.len() + aggregate_column_decoded_values.len(),
-        );
-
-        for gc in group_column_encoded_values {
-            if let Some(p) = gc {
-                all_columns.push(p);
-            } else {
-                panic!("need to handle no results for filtering/grouping...");
-            }
-        }
-
-        for ac in aggregate_column_decoded_values {
-            if let (_, Some(p)) = ac {
-                all_columns.push(p);
-            } else {
-                panic!("need to handle no results for filtering/grouping...");
-            }
-        }
-
-        let now = std::time::Instant::now();
-        if self.group_key_sorted(&group_columns) {
-            panic!("This shouldn't be called!!!");
-        } else {
-            // now sort on the first grouping columns. Right now the order doesn't matter...
-            let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
-            super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
-        }
-        log::debug!("time checking sort {:?}", now.elapsed());
-
-        let mut group_itrs = all_columns
-            .iter()
-            .take(group_columns.len()) // only use grouping columns
-            .map(|vector| {
-                if let column::Vector::Integer(v) = vector {
-                    v.iter()
-                } else {
-                    panic!("don't support grouping on non-encoded values");
-                }
-            })
-            .collect::<Vec<_>>();
-
-        let mut aggregate_itrs = all_columns
-            .iter()
-            .skip(group_columns.len()) // only use grouping columns
-            .map(|v| column::VectorIterator::new(v))
-            .collect::<Vec<_>>();
-
-        // this tracks the last seen group key row. When it changes we can emit
-        // the grouped aggregates.
-        let mut last_group_row = group_itrs
-            .iter_mut()
-            .enumerate()
-            .map(|(i, itr)| {
-                if i == group_columns.len() - 1 {
-                    // time column - apply window function
-                    return itr.next().unwrap() / window * window;
-                }
-                *itr.next().unwrap()
-            })
-            .collect::<Vec<_>>();
-
-        let mut curr_group_row = last_group_row.clone();
-
-        // this tracks the last row for each column we are aggregating.
-        let last_agg_row: Vec<column::Scalar> = aggregate_itrs
-            .iter_mut()
-            .map(|itr| itr.next().unwrap())
-            .collect();
-
-        // this keeps the current cumulative aggregates for the columns we
-        // are aggregating.
-        let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates
-            .iter()
-            .zip(last_agg_row.iter())
-            .map(|((col_name, agg_type), curr_agg)| {
-                let agg = match agg_type {
-                    AggregateType::Count => column::Aggregate::Count(1),
-                    AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()),
-                };
-                (col_name.clone(), agg)
-            })
-            .collect();
-
-        let mut results = BTreeMap::new();
-        let mut processed_rows = 1;
-        while processed_rows < *total_rows {
-            // update next group key.
-            let mut group_key_changed = false;
-            for (i, (curr_v, itr)) in curr_group_row
-                .iter_mut()
-                .zip(group_itrs.iter_mut())
-                .enumerate()
-            {
-                let next_v = if i == group_columns.len() - 1 {
-                    // time column - apply window function
-                    itr.next().unwrap() / window * window
-                } else {
-                    *itr.next().unwrap()
-                };
-                if curr_v != &next_v {
-                    group_key_changed = true;
-                }
-                *curr_v = next_v;
-            }
-
-            // group key changed - emit group row and aggregates.
-            if group_key_changed {
-                let key = last_group_row.clone();
-                results.insert(key, cum_aggregates.clone());
-
-                // update group key
-                last_group_row = curr_group_row.clone();
-
-                // reset cumulative aggregates
-                for (_, agg) in cum_aggregates.iter_mut() {
-                    match agg {
-                        column::Aggregate::Count(c) => {
-                            *c = 0;
-                        }
-                        column::Aggregate::Sum(s) => s.reset(),
-                    }
-                }
-            }
-
-            // update aggregates
-            for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) {
-                let (_, curr_agg) = bind.0;
-                let next_value = bind.1.next().unwrap();
-                curr_agg.update_with(next_value);
-            }
-
-            processed_rows += 1;
-        }
-
-        // Emit final row
-        results.insert(last_group_row, cum_aggregates);
-
-        log::info!("({:?} rows processed) {:?}", processed_rows, results);
-        // results
-        BTreeMap::new()
+        vec![]
     }
 
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
@@ -1399,12 +938,6 @@ impl SegmentMetaData {
     }
 }
 
-#[derive(Debug, Clone)]
-pub enum Aggregate {
-    Count,
-    Sum,
-}
-
 pub struct Segments<'a> {
     segments: Vec<&'a Segment>,
 }
@@ -1482,7 +1015,7 @@ impl<'a> Segments<'a> {
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         strategy: &GroupingStrategy,
-    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
@@ -1521,7 +1054,7 @@ impl<'a> Segments<'a> {
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         if concurrent {
             let group_columns_arc = std::sync::Arc::new(group_columns);
             let aggregates_arc = std::sync::Arc::new(aggregates);
@@ -1599,7 +1132,7 @@ impl<'a> Segments<'a> {
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, Aggregate), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1845,6 +1378,12 @@ pub enum GroupingStrategy {
     SortGroupConcurrent,
 }
 
+#[derive(Debug)]
+pub struct GroupedAggregates<'a> {
+    pub group_key: Vec<i64>,
+    pub aggregates: Vec<(String, column::Aggregate<'a>)>,
+}
+
 #[cfg(test)]
 mod test {
 

From bbebee654af07962d94b88898c3043b80a4a510d Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 28 Aug 2020 11:47:26 +0100
Subject: [PATCH 41/73] feat: support windowed aggregates with hash sort

---
 delorean_mem_qe/src/bin/main.rs |  20 +++---
 delorean_mem_qe/src/segment.rs  | 112 ++++++++++++++++++++------------
 2 files changed, 79 insertions(+), 53 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index be480e8def..5d021fb25b 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -69,8 +69,8 @@ fn main() {
     // time_sum_range(&store);
     // time_count_range(&store);
     // time_group_single_with_pred(&store);
-    // time_group_by_multi_agg_count(&store);
-    // time_group_by_multi_agg_sorted_count(&store);
+    time_group_by_multi_agg_count(&store);
+    time_group_by_multi_agg_sorted_count(&store);
     time_window_agg_count(&store);
 }
 
@@ -120,10 +120,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                if i < 363 {
-                    i += 1;
-                    continue;
-                }
+                // if i < 363 {
+                //     i += 1;
+                //     continue;
+                // }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -550,14 +550,14 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
 
 fn time_window_agg_count(store: &Store) {
     let strats = vec![
-        // GroupingStrategy::HashGroup,
-        // GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::HashGroup,
+        GroupingStrategy::HashGroupConcurrent,
         GroupingStrategy::SortGroup,
-        // GroupingStrategy::SortGroupConcurrent,
+        GroupingStrategy::SortGroupConcurrent,
     ];
 
     for strat in &strats {
-        let repeat = 10000;
+        let repeat = 1;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index a9da3a6543..dddf06a0fe 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -227,12 +227,21 @@ impl Segment {
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
+        window: i64,
     ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
-        // println!("working segment {:?}", time_range);
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
 
+        log::debug!("aggregate_by_group_with_hash called");
+
+        if window > 0 {
+            // last column on group key should be time.
+            assert_eq!(group_columns[group_columns.len() - 1], "time");
+        } else {
+            assert_ne!(group_columns[group_columns.len() - 1], "time");
+        }
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -248,29 +257,20 @@ impl Segment {
             .iter()
             .map(|v| *v as usize)
             .collect::<Vec<_>>();
-        // println!("TOTAL FILTERED ROWS {:?}", total_rows);
 
         // materialise all encoded values for the matching rows in the columns
         // we are grouping on and store each group as an iterator.
         let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
-                let encoded_values: Vec<i64>;
-                if let column::Vector::Integer(vector) =
-                    column.encoded_values(&filtered_row_ids_vec)
-                {
-                    encoded_values = vector;
-                } else {
-                    unimplemented!("currently you can only group on encoded string columns");
-                }
-
+                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
                     encoded_values.len()
                 );
-                group_column_encoded_values.push(Some(encoded_values));
+                group_column_encoded_values.push(encoded_values);
             } else {
-                group_column_encoded_values.push(None);
+                panic!("need to handle no results for filtering/grouping...");
             }
         }
         // println!("grouped columns {:?}", group_column_encoded_values);
@@ -304,9 +304,12 @@ impl Segment {
         // filtering stage we will just emit None.
         let mut group_itrs = group_column_encoded_values
             .iter()
-            .map(|x| match x {
-                Some(values) => Some(values.iter()),
-                None => None,
+            .map(|vector| {
+                if let column::Vector::Integer(v) = vector {
+                    v.iter()
+                } else {
+                    panic!("don't support grouping on non-encoded values");
+                }
             })
             .collect::<Vec<_>>();
 
@@ -321,10 +324,11 @@ impl Segment {
             })
             .collect::<Vec<_>>();
 
+        // hashMap is about 20% faster than BTreeMap in this case
         let mut hash_table: HashMap<
-            Vec<Option<&i64>>,
+            Vec<i64>,
             Vec<(&String, &AggregateType, Option<column::Aggregate>)>,
-        > = HashMap::with_capacity(30000);
+        > = HashMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
             std::iter::repeat_with(|| ("", None))
@@ -332,22 +336,20 @@ impl Segment {
                 .collect();
 
         let mut processed_rows = 0;
-        while processed_rows < *total_rows {
-            let group_row: Vec<Option<&i64>> = group_itrs
-                .iter_mut()
-                .map(|x| match x {
-                    Some(itr) => itr.next(),
-                    None => None,
-                })
-                .collect();
+        let group_itrs_len = &group_itrs.len();
 
-            // let aggregate_row: Vec<(&str, Option<column::Scalar>)> = aggregate_itrs
-            //     .iter_mut()
-            //     .map(|&mut (col_name, ref mut itr)| match itr {
-            //         Some(itr) => (col_name, itr.next()),
-            //         None => (col_name, None),
-            //     })
-            //     .collect();
+        while processed_rows < *total_rows {
+            let group_row = group_itrs
+                .iter_mut()
+                .enumerate()
+                .map(|(i, itr)| {
+                    if i == group_itrs_len - 1 && window > 0 {
+                        // time column - apply window function
+                        return itr.next().unwrap() / window * window;
+                    }
+                    *itr.next().unwrap()
+                })
+                .collect::<Vec<_>>();
 
             // re-use aggregate_row vector.
             for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
@@ -407,7 +409,7 @@ impl Segment {
             }
             processed_rows += 1;
         }
-        log::debug!("{:?}", hash_table);
+        log::info!("({:?} rows processed) {:?}", processed_rows, hash_table);
         BTreeMap::new()
     }
 
@@ -428,6 +430,10 @@ impl Segment {
             assert_ne!(group_columns[group_columns.len() - 1], "time");
         }
 
+        // TODO(edd): Perf - if there is no predicate and we want entire segment
+        // then it will be a lot faster to not build filtered_row_ids and just
+        // get all encoded values for each grouping column...
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -605,8 +611,9 @@ impl Segment {
         Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window)
     }
 
-    // Once the rows necessary for doing a (windowed) grouped aggregate are ready
-    // this method will build a result set of aggregates in a streaming way.
+    // Once the rows necessary for doing a (windowed) grouped aggregate are
+    // available and appropriately sorted this method will build a result set of
+    // aggregates in a streaming way.
     pub fn stream_grouped_aggregates<'a>(
         mut group_itrs: Vec<core::slice::Iter<i64>>,
         aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>,
@@ -1022,12 +1029,22 @@ impl<'a> Segments<'a> {
         }
 
         match strategy {
-            GroupingStrategy::HashGroup => {
-                self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, false)
-            }
-            GroupingStrategy::HashGroupConcurrent => {
-                self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, true)
-            }
+            GroupingStrategy::HashGroup => self.read_group_eq_hash(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+                false,
+            ),
+            GroupingStrategy::HashGroupConcurrent => self.read_group_eq_hash(
+                time_range,
+                predicates,
+                group_columns,
+                aggregates,
+                window,
+                true,
+            ),
             GroupingStrategy::SortGroup => self.read_group_eq_sort(
                 time_range,
                 predicates,
@@ -1051,10 +1068,16 @@ impl<'a> Segments<'a> {
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
-        group_columns: Vec<String>,
+        mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
+        window: i64,
         concurrent: bool,
     ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
+        if window > 0 {
+            // add time column to the group key
+            group_columns.push("time".to_string());
+        }
+
         if concurrent {
             let group_columns_arc = std::sync::Arc::new(group_columns);
             let aggregates_arc = std::sync::Arc::new(aggregates);
@@ -1072,6 +1095,7 @@ impl<'a> Segments<'a> {
                                 predicates,
                                 &group_columns,
                                 &aggregates,
+                                window,
                             );
                             log::info!(
                                 "processed segment {:?} using multi-threaded hash-grouping in {:?}",
@@ -1092,6 +1116,7 @@ impl<'a> Segments<'a> {
                     predicates,
                     &group_columns_arc.clone(),
                     &aggregates_arc.clone(),
+                    window,
                 );
                 log::info!(
                     "processed segment {:?} using multi-threaded hash-grouping in {:?}",
@@ -1113,6 +1138,7 @@ impl<'a> Segments<'a> {
                 predicates,
                 &group_columns,
                 &aggregates,
+                window,
             );
             log::info!(
                 "processed segment {:?} using single-threaded hash-grouping in {:?}",

From cfa0ef9c2302a4a343951854a3e57c5e277aada9 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 3 Sep 2020 15:53:30 +0100
Subject: [PATCH 42/73] perf: improve group sort

---
 delorean_mem_qe/src/segment.rs | 50 +++++++++++++++-------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index dddf06a0fe..c058df01f1 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -335,21 +335,19 @@ impl Segment {
                 .take(aggregate_itrs.len())
                 .collect();
 
-        let mut processed_rows = 0;
         let group_itrs_len = &group_itrs.len();
+        let mut group_key: Vec<i64> = vec![0; *group_itrs_len];
 
+        let mut processed_rows = 0;
         while processed_rows < *total_rows {
-            let group_row = group_itrs
-                .iter_mut()
-                .enumerate()
-                .map(|(i, itr)| {
-                    if i == group_itrs_len - 1 && window > 0 {
-                        // time column - apply window function
-                        return itr.next().unwrap() / window * window;
-                    }
-                    *itr.next().unwrap()
-                })
-                .collect::<Vec<_>>();
+            group_itrs.iter_mut().enumerate().for_each(|(i, itr)| {
+                if i == group_itrs_len - 1 && window > 0 {
+                    // time column - apply window function
+                    group_key[i] = itr.next().unwrap() / window * window;
+                } else {
+                    group_key[i] = *itr.next().unwrap();
+                }
+            });
 
             // re-use aggregate_row vector.
             for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
@@ -359,17 +357,16 @@ impl Segment {
                 }
             }
 
-            // Lookup the group key in the hash map - if it's empty then insert
-            // a place-holder for each aggregate being executed.
-            let group_key_entry = hash_table.entry(group_row).or_insert_with(|| {
-                // TODO COULD BE MAP/COLLECT
+            // This is cheaper than allocating a key and using the entry API
+            if !hash_table.contains_key(&group_key) {
                 let mut agg_results: Vec<(&String, &AggregateType, Option<column::Aggregate>)> =
                     Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
                 }
-                agg_results
-            });
+                hash_table.insert(group_key.clone(), agg_results);
+            }
+            let group_key_entry = hash_table.get_mut(&group_key).unwrap();
 
             // Update aggregates - we process each row value and for each one
             // check which aggregates apply to it.
@@ -409,7 +406,7 @@ impl Segment {
             }
             processed_rows += 1;
         }
-        log::info!("({:?} rows processed) {:?}", processed_rows, hash_table);
+        log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
         BTreeMap::new()
     }
 
@@ -551,7 +548,7 @@ impl Segment {
         aggregates: &[(String, AggregateType)],
         window: i64,
     ) -> Vec<GroupedAggregates<'a>> {
-        log::debug!("aggregate_by_group_with_sort_sorted called");
+        log::debug!("aggregate_by_group_using_stream called");
 
         if window > 0 {
             // last column on group key should be time.
@@ -674,13 +671,11 @@ impl Segment {
                         group_key_start_row_id + group_size,
                     );
 
-                    let col_name = name.to_owned().clone();
-                    group_key_aggregates.push((col_name, agg_result));
+                    group_key_aggregates.push((*name, agg_result));
                 }
 
-                let key = last_group_row.clone();
                 results.push(GroupedAggregates {
-                    group_key: key,
+                    group_key: last_group_row,
                     aggregates: group_key_aggregates,
                 });
 
@@ -706,8 +701,7 @@ impl Segment {
             );
 
             // TODO(edd): fix weirdness
-            let col_name = name.to_owned().clone();
-            group_key_aggregates.push((col_name, agg_result));
+            group_key_aggregates.push((*name, agg_result));
         }
 
         results.push(GroupedAggregates {
@@ -715,7 +709,7 @@ impl Segment {
             aggregates: group_key_aggregates,
         });
 
-        log::info!("({:?} rows processed) {:?}", processed_rows, results);
+        log::debug!("({:?} rows processed) {:?}", processed_rows, results);
         // results
         vec![]
     }
@@ -1407,7 +1401,7 @@ pub enum GroupingStrategy {
 #[derive(Debug)]
 pub struct GroupedAggregates<'a> {
     pub group_key: Vec<i64>,
-    pub aggregates: Vec<(String, column::Aggregate<'a>)>,
+    pub aggregates: Vec<(&'a String, column::Aggregate<'a>)>,
 }
 
 #[cfg(test)]

From cad5e45208346528ad02cd04dcac863f90faa037 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 4 Sep 2020 12:08:22 +0100
Subject: [PATCH 43/73] perf: add ability to get all encoded values

---
 delorean_mem_qe/src/column.rs   | 28 ++++++++++++++++++++
 delorean_mem_qe/src/encoding.rs | 45 +++++++++++++++++++++++----------
 delorean_mem_qe/src/segment.rs  | 36 +++++++++++++++++++++-----
 3 files changed, 89 insertions(+), 20 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index bc89cb23bd..b3df18edca 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -537,6 +537,22 @@ impl Column {
         }
     }
 
+    /// Materialise all of the encoded values.
+    pub fn all_encoded_values(&self) -> Vector {
+        match self {
+            Column::String(c) => {
+                let now = std::time::Instant::now();
+                let v = c.all_encoded_values();
+                log::debug!("time getting all encoded values {:?}", now.elapsed());
+
+                log::debug!("dictionary {:?}", c.data.dictionary());
+                Vector::Integer(v)
+            }
+            Column::Float(c) => Vector::Float(c.all_encoded_values()),
+            Column::Integer(c) => Vector::Integer(c.all_encoded_values()),
+        }
+    }
+
     /// Given an encoded value for a row, materialise and return the decoded
     /// version.
     ///
@@ -986,6 +1002,10 @@ impl String {
         self.data.encoded_values(row_ids)
     }
 
+    pub fn all_encoded_values(&self) -> Vec<i64> {
+        self.data.all_encoded_values()
+    }
+
     /// Return the decoded value for an encoded ID.
     ///
     /// Panics if there is no decoded value for the provided id
@@ -1037,6 +1057,10 @@ impl Float {
         self.data.encoded_values(row_ids)
     }
 
+    pub fn all_encoded_values(&self) -> Vec<f64> {
+        self.data.all_encoded_values()
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[f64] {
         self.data.scan_from(row_id)
     }
@@ -1106,6 +1130,10 @@ impl Integer {
         self.data.encoded_values(row_ids)
     }
 
+    pub fn all_encoded_values(&self) -> Vec<i64> {
+        self.data.all_encoded_values()
+    }
+
     pub fn scan_from(&self, row_id: usize) -> &[i64] {
         self.data.scan_from(row_id)
     }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index d6a865a5f1..4b057cfc96 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -68,6 +68,12 @@ where
         self.values(row_ids)
     }
 
+    /// Return all encoded values. For this encoding this is just the decoded
+    /// values
+    pub fn all_encoded_values(&self) -> Vec<T> {
+        self.values.clone()
+    }
+
     // TODO(edd): fix this when added NULL support
     pub fn scan_from_until_some(&self, _row_id: usize) -> Option<T> {
         unreachable!("to remove");
@@ -485,6 +491,26 @@ impl DictionaryRLE {
         out
     }
 
+    // values materialises a vector of references to all logical values in the
+    // encoding.
+    pub fn all_values(&mut self) -> Vec<Option<&String>> {
+        let mut out: Vec<Option<&String>> = Vec::with_capacity(self.total as usize);
+
+        // build reverse mapping.
+        let mut idx_value = BTreeMap::new();
+        for (k, v) in &self.entry_index {
+            idx_value.insert(v, k);
+        }
+        assert_eq!(idx_value.len(), self.entry_index.len());
+
+        for (idx, rl) in &self.run_lengths {
+            // TODO(edd): fix unwrap - we know that the value exists in map...
+            let v = idx_value.get(&idx).unwrap().as_ref();
+            out.extend(iter::repeat(v).take(*rl as usize));
+        }
+        out
+    }
+
     /// Return the decoded value for an encoded ID.
     ///
     /// Panics if there is no decoded value for the provided id
@@ -528,22 +554,13 @@ impl DictionaryRLE {
         out
     }
 
-    // values materialises a vector of references to all logical values in the
-    // encoding.
-    pub fn all_values(&mut self) -> Vec<Option<&String>> {
-        let mut out: Vec<Option<&String>> = Vec::with_capacity(self.total as usize);
-
-        // build reverse mapping.
-        let mut idx_value = BTreeMap::new();
-        for (k, v) in &self.entry_index {
-            idx_value.insert(v, k);
-        }
-        assert_eq!(idx_value.len(), self.entry_index.len());
+    // all_encoded_values materialises a vector of all encoded values for the
+    // column.
+    pub fn all_encoded_values(&self) -> Vec<i64> {
+        let mut out: Vec<i64> = Vec::with_capacity(self.total as usize);
 
         for (idx, rl) in &self.run_lengths {
-            // TODO(edd): fix unwrap - we know that the value exists in map...
-            let v = idx_value.get(&idx).unwrap().as_ref();
-            out.extend(iter::repeat(v).take(*rl as usize));
+            out.extend(iter::repeat(*idx as i64).take(*rl as usize));
         }
         out
     }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index c058df01f1..f8c500593e 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -228,7 +228,7 @@ impl Segment {
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<String>, Vec<(String, Option<column::Aggregate>)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&String, &AggregateType, Option<column::Aggregate>)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -242,6 +242,10 @@ impl Segment {
             assert_ne!(group_columns[group_columns.len() - 1], "time");
         }
 
+        // TODO(edd): Perf - if there is no predicate and we want entire segment
+        // then it will be a lot faster to not build filtered_row_ids and just
+        // get all encoded values for each grouping column...
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -263,7 +267,12 @@ impl Segment {
         let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
-                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows {
+                    column.all_encoded_values()
+                } else {
+                    column.encoded_values(&filtered_row_ids_vec)
+                };
+
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
                     encoded_values.len()
@@ -325,10 +334,10 @@ impl Segment {
             .collect::<Vec<_>>();
 
         // hashMap is about 20% faster than BTreeMap in this case
-        let mut hash_table: HashMap<
+        let mut hash_table: BTreeMap<
             Vec<i64>,
             Vec<(&String, &AggregateType, Option<column::Aggregate>)>,
-        > = HashMap::new();
+        > = BTreeMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
             std::iter::repeat_with(|| ("", None))
@@ -406,8 +415,10 @@ impl Segment {
             }
             processed_rows += 1;
         }
+        // println!("groups: {:?}", hash_table.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
         BTreeMap::new()
+        // hash_table
     }
 
     pub fn aggregate_by_group_using_sort(
@@ -451,7 +462,11 @@ impl Segment {
         let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
-                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows {
+                    column.all_encoded_values()
+                } else {
+                    column.encoded_values(&filtered_row_ids_vec)
+                };
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
                     encoded_values.len()
@@ -557,6 +572,10 @@ impl Segment {
             assert_ne!(group_columns[group_columns.len() - 1], "time");
         }
 
+        // TODO(edd): Perf - if there is no predicate and we want entire segment
+        // then it will be a lot faster to not build filtered_row_ids and just
+        // get all encoded values for each grouping column...
+
         // filter on predicates and time
         let filtered_row_ids: croaring::Bitmap;
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
@@ -577,7 +596,11 @@ impl Segment {
         let mut group_column_encoded_values = Vec::with_capacity(group_columns.len());
         for group_column in group_columns {
             if let Some(column) = self.column(&group_column) {
-                let encoded_values = column.encoded_values(&filtered_row_ids_vec);
+                let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows {
+                    column.all_encoded_values()
+                } else {
+                    column.encoded_values(&filtered_row_ids_vec)
+                };
                 assert_eq!(
                     filtered_row_ids.cardinality() as usize,
                     encoded_values.len()
@@ -709,6 +732,7 @@ impl Segment {
             aggregates: group_key_aggregates,
         });
 
+        // println!("groups: {:?}", results.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, results);
         // results
         vec![]

From d3fd2c36290cc9257ace84c1c633d6d390743b59 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 4 Sep 2020 15:58:04 +0100
Subject: [PATCH 44/73] feat: wip group by *

---
 delorean_mem_qe/src/bin/main.rs | 84 ++++++++++++++++++++++++++++-----
 delorean_mem_qe/src/segment.rs  | 33 +++++++------
 2 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 5d021fb25b..d4b810467d 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -63,15 +63,16 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    // time_select_with_pred(&store);
-    // time_datafusion_select_with_pred(store.clone());
-    // time_first_host(&store);
-    // time_sum_range(&store);
-    // time_count_range(&store);
-    // time_group_single_with_pred(&store);
+    time_select_with_pred(&store);
+    time_datafusion_select_with_pred(store.clone());
+    time_first_host(&store);
+    time_sum_range(&store);
+    time_count_range(&store);
+    time_group_single_with_pred(&store);
     time_group_by_multi_agg_count(&store);
     time_group_by_multi_agg_sorted_count(&store);
     time_window_agg_count(&store);
+    // time_group_by_different_columns(&store);
 }
 
 fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
@@ -94,7 +95,7 @@ fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) ->
 }
 
 fn build_arrow_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
-    let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap();
+    let r = File::open(Path::new(path)).unwrap();
     let file_size = fs::metadata(&path).expect("read metadata").len();
     println!(
         "Reading {} ({}) bytes of Arrow from {:?}....",
@@ -120,7 +121,7 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                // if i < 363 {
+                // if i < 364 {
                 //     i += 1;
                 //     continue;
                 // }
@@ -467,9 +468,9 @@ fn time_group_single_with_pred(store: &Store) {
 fn time_group_by_multi_agg_count(store: &Store) {
     let strats = vec![
         GroupingStrategy::HashGroup,
-        GroupingStrategy::HashGroupConcurrent,
+        // GroupingStrategy::HashGroupConcurrent,
         GroupingStrategy::SortGroup,
-        GroupingStrategy::SortGroupConcurrent,
+        // GroupingStrategy::SortGroupConcurrent,
     ];
 
     for strat in &strats {
@@ -518,7 +519,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
     ];
 
     for strat in &strats {
-        let repeat = 10;
+        let repeat = 1;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
@@ -586,3 +587,64 @@ fn time_window_agg_count(store: &Store) {
         );
     }
 }
+
+// This is for a performance experiment where I wanted to show the performance
+// change as more columns are grouped on.
+//
+// This only shows good peformance when the input file is ordered on all of the
+// columns below.
+fn time_group_by_different_columns(store: &Store) {
+    let strats = vec![
+        GroupingStrategy::HashGroup,
+        GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::SortGroup,
+        GroupingStrategy::SortGroupConcurrent,
+    ];
+
+    let cols = vec![
+        "status".to_string(),
+        "method".to_string(),
+        "url".to_string(),
+        "env".to_string(),
+        "handler".to_string(),
+        "role".to_string(),
+        "user_agent".to_string(),
+        "path".to_string(),
+        "nodename".to_string(),
+        "host".to_string(),
+        "hostname".to_string(),
+    ];
+
+    for strat in &strats {
+        let repeat = 10;
+        let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+        let mut total_max = 0;
+        let segments = store.segments();
+
+        for i in 1..=cols.len() {
+            for _ in 0..repeat {
+                let now = std::time::Instant::now();
+
+                let groups = segments.read_group_eq(
+                    (1589000000000001, 1590044410000000),
+                    &[],
+                    cols[0..i].to_vec(),
+                    vec![("counter".to_string(), AggregateType::Count)],
+                    0,
+                    strat,
+                );
+
+                total_time += now.elapsed();
+                total_max += groups.len();
+            }
+            println!(
+                "time_group_by_different_columns{:?} cols: {:?} ran {:?} in {:?} {:?}",
+                strat,
+                i,
+                repeat,
+                total_time,
+                total_time / repeat,
+            );
+        }
+    }
+}
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index f8c500593e..c06e9d6bca 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -221,14 +221,14 @@ impl Segment {
         true
     }
 
-    pub fn aggregate_by_group_with_hash(
+    pub fn aggregate_by_group_with_hash<'a>(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<&column::Scalar>)],
         group_columns: &[String],
-        aggregates: &[(String, AggregateType)],
+        aggregates: &'a [(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&String, &AggregateType, Option<column::Aggregate>)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, Option<column::Aggregate>)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -290,6 +290,8 @@ impl Segment {
         // aggregating on.
         let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
         for (column_name, _) in aggregates {
+            let column_name: &'a String = column_name;
+
             if let Some(column) = self.column(&column_name) {
                 let decoded_values = column.values(&filtered_row_ids_vec);
                 assert_eq!(
@@ -336,7 +338,7 @@ impl Segment {
         // hashMap is about 20% faster than BTreeMap in this case
         let mut hash_table: BTreeMap<
             Vec<i64>,
-            Vec<(&String, &AggregateType, Option<column::Aggregate>)>,
+            Vec<(&'a String, &'a AggregateType, Option<column::Aggregate>)>,
         > = BTreeMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
@@ -368,8 +370,11 @@ impl Segment {
 
             // This is cheaper than allocating a key and using the entry API
             if !hash_table.contains_key(&group_key) {
-                let mut agg_results: Vec<(&String, &AggregateType, Option<column::Aggregate>)> =
-                    Vec::with_capacity(aggregates.len());
+                let mut agg_results: Vec<(
+                    &'a String,
+                    &'a AggregateType,
+                    Option<column::Aggregate>,
+                )> = Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
                 }
@@ -417,8 +422,8 @@ impl Segment {
         }
         // println!("groups: {:?}", hash_table.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
-        BTreeMap::new()
-        // hash_table
+        // BTreeMap::new()
+        hash_table
     }
 
     pub fn aggregate_by_group_using_sort(
@@ -1097,14 +1102,14 @@ impl<'a> Segments<'a> {
         }
 
         if concurrent {
-            let group_columns_arc = std::sync::Arc::new(group_columns);
-            let aggregates_arc = std::sync::Arc::new(aggregates);
+            // let group_columns_arc = std::sync::Arc::new(group_columns);
+            // let aggregates_arc = std::sync::Arc::new(aggregates);
 
             for chunked_segments in self.segments.chunks(THREADS) {
                 crossbeam::scope(|scope| {
                     for segment in chunked_segments {
-                        let group_columns = group_columns_arc.clone();
-                        let aggregates = aggregates_arc.clone();
+                        // let group_columns = group_columns_arc.clone();
+                        // let aggregates = aggregates_arc.clone();
 
                         scope.spawn(move |_| {
                             let now = std::time::Instant::now();
@@ -1132,8 +1137,8 @@ impl<'a> Segments<'a> {
                 segment.aggregate_by_group_with_hash(
                     time_range,
                     predicates,
-                    &group_columns_arc.clone(),
-                    &aggregates_arc.clone(),
+                    &group_columns,
+                    &aggregates,
                     window,
                 );
                 log::info!(

From 551f6c3c78049aa147c33e92dfed60c7c5e6005f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 7 Sep 2020 11:20:08 +0100
Subject: [PATCH 45/73] refactor: cleanup

---
 delorean_mem_qe/src/column.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index b3df18edca..89345800a5 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -54,17 +54,17 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
         match self {
             Self::Float(v) => {
                 if let Self::Float(other) = _rhs {
-                    return Self::Float(v + other);
+                    Self::Float(v + other)
                 } else {
                     panic!("invalid");
-                };
+                }
             }
             Self::Integer(v) => {
                 if let Self::Integer(other) = _rhs {
-                    return Self::Integer(v + other);
+                    Self::Integer(v + other)
                 } else {
                     panic!("invalid");
-                };
+                }
             }
             Self::String(_) => {
                 unreachable!("not possible to add strings");
@@ -140,17 +140,17 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
         match self {
             Self::Count(c) => {
                 if let Self::Count(other) = _rhs {
-                    return Self::Count(c + other);
+                    Self::Count(c + other)
                 } else {
                     panic!("invalid");
-                };
+                }
             }
             Self::Sum(s) => {
                 if let Self::Sum(other) = _rhs {
-                    return Self::Sum(s + other);
+                    Self::Sum(s + other)
                 } else {
                     panic!("invalid");
-                };
+                }
             }
         }
     }
@@ -360,7 +360,7 @@ impl Column {
 
     /// Materialise the decoded value matching the provided logical
     /// row id.
-    pub fn value(&self, row_id: usize) -> Option<Scalar> {
+    pub fn value(&self, row_id: usize) -> Option<Scalar<'_>> {
         match self {
             Column::String(c) => {
                 if row_id >= self.num_rows() {

From 9cb18fd94216c97d930679c4582578d03571f0da Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 7 Sep 2020 17:42:50 +0100
Subject: [PATCH 46/73] refactor: address lifetimes

---
 delorean_mem_qe/src/adapter.rs |  2 +-
 delorean_mem_qe/src/column.rs  | 92 +++++++++++++++++-----------------
 delorean_mem_qe/src/lib.rs     |  3 +-
 delorean_mem_qe/src/segment.rs | 16 +++---
 delorean_mem_qe/src/sorter.rs  | 12 ++---
 5 files changed, 63 insertions(+), 62 deletions(-)

diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
index 0fcb5cb34a..e7fed4a721 100644
--- a/delorean_mem_qe/src/adapter.rs
+++ b/delorean_mem_qe/src/adapter.rs
@@ -181,7 +181,7 @@ impl LogicalPlanNode for SegmentScan {
     }
 
     /// Write a single line human readable string to `f` for use in explain plan
-    fn format_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(
             f,
             "SegmentScan: {:?} predicate {:?}",
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 89345800a5..7d7618d92b 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -162,7 +162,7 @@ pub trait AggregatableByRange {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate;
+    ) -> Aggregate<'_>;
 }
 /// A Vector is a materialised vector of values from a column.
 pub enum Vector<'a> {
@@ -177,7 +177,7 @@ impl<'a> Vector<'a> {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate {
+    ) -> Aggregate<'a> {
         match agg_type {
             AggregateType::Count => {
                 Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
@@ -186,7 +186,7 @@ impl<'a> Vector<'a> {
         }
     }
 
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> {
         match self {
             Vector::String(_) => {
                 panic!("can't sum strings....");
@@ -282,7 +282,7 @@ impl AggregatableByRange for &Vector<'_> {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate {
+    ) -> Aggregate<'_> {
         Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
     }
 }
@@ -389,7 +389,7 @@ impl Column {
 
     /// Materialise all of the decoded values matching the provided logical
     /// row ids.
-    pub fn values(&self, row_ids: &[usize]) -> Vector {
+    pub fn values(&self, row_ids: &[usize]) -> Vector<'_> {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
@@ -424,7 +424,7 @@ impl Column {
 
     /// Materialise all of the decoded values matching the provided logical
     /// row ids within the bitmap
-    pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
@@ -467,7 +467,7 @@ impl Column {
 
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
-    pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
         let now = std::time::Instant::now();
         let row_ids_vec = row_ids
             .to_vec()
@@ -506,7 +506,7 @@ impl Column {
 
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vector {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vector<'_> {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
@@ -538,7 +538,7 @@ impl Column {
     }
 
     /// Materialise all of the encoded values.
-    pub fn all_encoded_values(&self) -> Vector {
+    pub fn all_encoded_values(&self) -> Vector<'_> {
         match self {
             Column::String(c) => {
                 let now = std::time::Instant::now();
@@ -574,7 +574,7 @@ impl Column {
     }
 
     /// materialise rows for each row_id
-    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
         let now = std::time::Instant::now();
         let row_ids_vec = row_ids
             .to_vec()
@@ -596,33 +596,33 @@ impl Column {
         }
     }
 
-    /// materialise all rows including and after row_id
-    pub fn scan_from(&self, _row_id: usize) -> Option<Vector> {
-        unimplemented!("todo");
-        // if row_id >= self.num_rows() {
-        //     println!(
-        //         "asking for {:?} but only got {:?} rows",
-        //         row_id,
-        //         self.num_rows()
-        //     );
-        //     return None;
-        // }
+    // /// materialise all rows including and after row_id
+    // pub fn scan_from(&self, _row_id: usize) -> Option<Vector> {
+    //     unimplemented!("todo");
+    //     // if row_id >= self.num_rows() {
+    //     //     println!(
+    //     //         "asking for {:?} but only got {:?} rows",
+    //     //         row_id,
+    //     //         self.num_rows()
+    //     //     );
+    //     //     return None;
+    //     // }
 
-        // println!(
-        //     "asking for {:?} with a column having {:?} rows",
-        //     row_id,
-        //     self.num_rows()
-        // );
-        // match self {
-        //     Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
-        //     Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
-        //     Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
-        // }
-    }
+    //     // println!(
+    //     //     "asking for {:?} with a column having {:?} rows",
+    //     //     row_id,
+    //     //     self.num_rows()
+    //     // );
+    //     // match self {
+    //     //     Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
+    //     //     Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
+    //     //     Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
+    //     // }
+    // }
 
     /// Given the provided row_id scans the column until a non-null value found
     /// or the column is exhausted.
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<Scalar> {
+    pub fn scan_from_until_some(&self, row_id: usize) -> Option<Scalar<'_>> {
         match self {
             Column::String(c) => {
                 if row_id >= self.num_rows() {
@@ -655,7 +655,7 @@ impl Column {
         }
     }
 
-    pub fn maybe_contains(&self, value: Option<&Scalar>) -> bool {
+    pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool {
         match self {
             Column::String(c) => match value {
                 Some(scalar) => {
@@ -685,7 +685,7 @@ impl Column {
     }
 
     /// returns true if the column cannot contain
-    pub fn max_less_than(&self, value: Option<&Scalar>) -> bool {
+    pub fn max_less_than(&self, value: Option<&Scalar<'_>>) -> bool {
         match self {
             Column::String(c) => match value {
                 Some(scalar) => {
@@ -714,7 +714,7 @@ impl Column {
         }
     }
 
-    pub fn min_greater_than(&self, value: Option<&Scalar>) -> bool {
+    pub fn min_greater_than(&self, value: Option<&Scalar<'_>>) -> bool {
         match self {
             Column::String(c) => match value {
                 Some(scalar) => {
@@ -745,7 +745,7 @@ impl Column {
 
     /// Returns the minimum value contained within this column.
     // FIXME(edd): Support NULL integers and floats
-    pub fn min(&self) -> Option<Scalar> {
+    pub fn min(&self) -> Option<Scalar<'_>> {
         match self {
             Column::String(c) => {
                 if let Some(min) = c.meta.range().0 {
@@ -760,7 +760,7 @@ impl Column {
 
     /// Returns the maximum value contained within this column.
     // FIXME(edd): Support NULL integers and floats
-    pub fn max(&self) -> Option<Scalar> {
+    pub fn max(&self) -> Option<Scalar<'_>> {
         match self {
             Column::String(c) => {
                 if let Some(max) = c.meta.range().1 {
@@ -773,7 +773,7 @@ impl Column {
         }
     }
 
-    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar> {
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar<'_>> {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))),
@@ -786,7 +786,7 @@ impl Column {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate {
+    ) -> Aggregate<'_> {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => match agg_type {
@@ -811,21 +811,21 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
         if !self.maybe_contains(value) {
             return None;
         }
         self.row_ids(value, std::cmp::Ordering::Equal)
     }
 
-    pub fn row_ids_gt(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_gt(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
         if self.max_less_than(value) {
             return None;
         }
         self.row_ids(value, std::cmp::Ordering::Greater)
     }
 
-    pub fn row_ids_lt(&self, value: Option<&Scalar>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_lt(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
         if self.min_greater_than(value) {
             return None;
         }
@@ -838,7 +838,7 @@ impl Column {
     // or
     //
     //      WHERE counter >= 102.2 AND counter < 2929.32
-    pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option<croaring::Bitmap> {
+    pub fn row_ids_gte_lt(&self, low: &Scalar<'_>, high: &Scalar<'_>) -> Option<croaring::Bitmap> {
         match self {
             Column::String(_c) => {
                 unimplemented!("not implemented yet");
@@ -895,7 +895,7 @@ impl Column {
     // TODO(edd) shouldn't let roaring stuff leak out...
     fn row_ids(
         &self,
-        value: Option<&Scalar>,
+        value: Option<&Scalar<'_>>,
         order: std::cmp::Ordering,
     ) -> Option<croaring::Bitmap> {
         match self {
@@ -938,7 +938,7 @@ impl AggregatableByRange for &Column {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate {
+    ) -> Aggregate<'_> {
         Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
     }
 }
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index c8617e1cf0..4176d3cdd8 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -1,3 +1,4 @@
+#![deny(rust_2018_idioms)]
 pub mod adapter;
 pub mod column;
 pub mod encoding;
@@ -30,7 +31,7 @@ impl Store {
         self.segments.len()
     }
 
-    pub fn segments(&self) -> Segments {
+    pub fn segments(&self) -> Segments<'_> {
         Segments::new(self.segments.iter().collect::<Vec<&Segment>>())
     }
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index c06e9d6bca..207951823e 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -145,12 +145,12 @@ impl Segment {
         column_sizes
     }
 
-    pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option<column::Vector> {
-        if let Some(i) = self.column_names().iter().position(|c| c == column_name) {
-            return self.columns[i].scan_from(row_id);
-        }
-        None
-    }
+    // pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option<column::Vector> {
+    //     if let Some(i) = self.column_names().iter().position(|c| c == column_name) {
+    //         return self.columns[i].scan_from(row_id);
+    //     }
+    //     None
+    // }
 
     // Materialise all rows for each desired column.
     //
@@ -1108,8 +1108,8 @@ impl<'a> Segments<'a> {
             for chunked_segments in self.segments.chunks(THREADS) {
                 crossbeam::scope(|scope| {
                     for segment in chunked_segments {
-                        // let group_columns = group_columns_arc.clone();
-                        // let aggregates = aggregates_arc.clone();
+                        let group_columns = &group_columns;
+                        let aggregates = &aggregates;
 
                         scope.spawn(move |_| {
                             let now = std::time::Instant::now();
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
index c8b01a3432..fb592b87ad 100644
--- a/delorean_mem_qe/src/sorter.rs
+++ b/delorean_mem_qe/src/sorter.rs
@@ -43,7 +43,7 @@ const SORTED_CHECK_SIZE: usize = 1000;
 ///
 /// All chosen columns will be sorted in ascending order; the sort is *not*
 /// stable.
-pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> {
+pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), Error> {
     if vectors.is_empty() || sort_by.is_empty() {
         return Ok(());
     }
@@ -87,7 +87,7 @@ pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Err
     Ok(())
 }
 
-fn quicksort_by(vectors: &mut [column::Vector], range: Range<usize>, sort_by: &[usize]) {
+fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range<usize>, sort_by: &[usize]) {
     if range.start >= range.end {
         return;
     }
@@ -97,7 +97,7 @@ fn quicksort_by(vectors: &mut [column::Vector], range: Range<usize>, sort_by: &[
     quicksort_by(vectors, pivot + 1..range.end, sort_by);
 }
 
-fn partition(vectors: &mut [column::Vector], range: &Range<usize>, sort_by: &[usize]) -> usize {
+fn partition(vectors: &mut [column::Vector<'_>], range: &Range<usize>, sort_by: &[usize]) -> usize {
     let pivot = (range.start + range.end) / 2;
     let (lo, hi) = (range.start, range.end);
     if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less {
@@ -133,7 +133,7 @@ fn partition(vectors: &mut [column::Vector], range: &Range<usize>, sort_by: &[us
     }
 }
 
-fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
+fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     for &idx in sort_by {
         match &vectors[idx] {
             column::Vector::String(p) => {
@@ -157,7 +157,7 @@ fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ord
 }
 
 #[allow(dead_code)]
-fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool {
+fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usize]) -> bool {
     'row_wise: for i in 1..len {
         for &idx in sort_by {
             match &vectors[idx] {
@@ -191,7 +191,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize])
 }
 
 // Swap the same pair of elements in each packer column
-fn swap(vectors: &mut [column::Vector], a: usize, b: usize) {
+fn swap(vectors: &mut [column::Vector<'_>], a: usize, b: usize) {
     for p in vectors {
         p.swap(a, b);
     }

From b0e0676f61e5f4ebe8a554bf4d9f52ee2860b4ad Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 7 Sep 2020 17:47:32 +0100
Subject: [PATCH 47/73] refactor: address lifetimes

---
 delorean_mem_qe/src/segment.rs | 71 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 207951823e..77936fab85 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -160,8 +160,8 @@ impl Segment {
         &self,
         row_ids: &croaring::Bitmap,
         columns: &[String],
-    ) -> BTreeMap<String, column::Vector> {
-        let mut rows: BTreeMap<String, column::Vector> = BTreeMap::new();
+    ) -> BTreeMap<String, column::Vector<'_>> {
+        let mut rows: BTreeMap<String, column::Vector<'_>> = BTreeMap::new();
         if row_ids.is_empty() {
             // nothing to return
             return rows;
@@ -224,11 +224,12 @@ impl Segment {
     pub fn aggregate_by_group_with_hash<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'a>>)],
         group_columns: &[String],
         aggregates: &'a [(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, Option<column::Aggregate>)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, Option<column::Aggregate<'a>>)>>
+    {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -338,10 +339,10 @@ impl Segment {
         // hashMap is about 20% faster than BTreeMap in this case
         let mut hash_table: BTreeMap<
             Vec<i64>,
-            Vec<(&'a String, &'a AggregateType, Option<column::Aggregate>)>,
+            Vec<(&'a String, &'a AggregateType, Option<column::Aggregate<'_>>)>,
         > = BTreeMap::new();
 
-        let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
+        let mut aggregate_row: Vec<(&str, Option<column::Scalar<'_>>)> =
             std::iter::repeat_with(|| ("", None))
                 .take(aggregate_itrs.len())
                 .collect();
@@ -373,7 +374,7 @@ impl Segment {
                 let mut agg_results: Vec<(
                     &'a String,
                     &'a AggregateType,
-                    Option<column::Aggregate>,
+                    Option<column::Aggregate<'_>>,
                 )> = Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
@@ -429,11 +430,11 @@ impl Segment {
     pub fn aggregate_by_group_using_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> Vec<GroupedAggregates> {
+    ) -> Vec<GroupedAggregates<'_>> {
         log::debug!("aggregate_by_group_with_sort_unsorted called");
 
         if window > 0 {
@@ -563,7 +564,7 @@ impl Segment {
     pub fn aggregate_by_group_using_stream<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
@@ -640,7 +641,7 @@ impl Segment {
     // available and appropriately sorted this method will build a result set of
     // aggregates in a streaming way.
     pub fn stream_grouped_aggregates<'a>(
-        mut group_itrs: Vec<core::slice::Iter<i64>>,
+        mut group_itrs: Vec<core::slice::Iter<'_, i64>>,
         aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>,
         total_rows: usize,
         window: i64,
@@ -743,7 +744,11 @@ impl Segment {
         vec![]
     }
 
-    pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
+    pub fn sum_column(
+        &self,
+        name: &str,
+        row_ids: &mut croaring::Bitmap,
+    ) -> Option<column::Scalar<'_>> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
         }
@@ -765,7 +770,7 @@ impl Segment {
     pub fn filter_by_predicates_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
     ) -> Option<croaring::Bitmap> {
         if !self.meta.overlaps_time_range(time_range.0, time_range.1) {
             return None; // segment doesn't have time range
@@ -783,7 +788,7 @@ impl Segment {
     fn filter_by_predicates_eq_time(
         &self,
         time_range: (i64, i64),
-        predicates: Vec<(&str, Option<&column::Scalar>)>,
+        predicates: Vec<(&str, Option<&column::Scalar<'_>>)>,
     ) -> Option<croaring::Bitmap> {
         // Get all row_ids matching the time range:
         //
@@ -821,7 +826,7 @@ impl Segment {
     // meta row_ids bitmap.
     fn filter_by_predicates_eq_no_time(
         &self,
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
     ) -> Option<croaring::Bitmap> {
         if predicates.is_empty() {
             // In this case there are no predicates provided and we have no time
@@ -865,10 +870,10 @@ impl Segment {
     pub fn group_single_agg_by_predicate_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         group_column: &String,
         aggregates: &Vec<(String, column::AggregateType)>,
-    ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate)>> {
+    ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate<'_>)>> {
         let mut grouped_results = BTreeMap::new();
 
         let filter_row_ids: croaring::Bitmap;
@@ -884,7 +889,7 @@ impl Segment {
                 let mut filtered_row_ids = row_ids.and(&filter_row_ids);
                 if !filtered_row_ids.is_empty() {
                     // First calculate all of the aggregates for this grouped value
-                    let mut aggs: Vec<((String, AggregateType), column::Aggregate)> =
+                    let mut aggs: Vec<((String, AggregateType), column::Aggregate<'_>)> =
                         Vec::with_capacity(aggregates.len());
 
                     for (col_name, agg) in aggregates {
@@ -1004,15 +1009,15 @@ impl<'a> Segments<'a> {
     pub fn read_filter_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         select_columns: Vec<String>,
-    ) -> BTreeMap<String, column::Vector> {
+    ) -> BTreeMap<String, column::Vector<'_>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
         }
 
-        let mut columns: BTreeMap<String, column::Vector> = BTreeMap::new();
+        let mut columns: BTreeMap<String, column::Vector<'_>> = BTreeMap::new();
         for segment in &self.segments {
             if !segment.meta.overlaps_time_range(min, max) {
                 continue; // segment doesn't have time range
@@ -1040,12 +1045,12 @@ impl<'a> Segments<'a> {
     pub fn read_group_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         strategy: &GroupingStrategy,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
@@ -1090,12 +1095,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_hash(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1176,12 +1181,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar>)],
+        predicates: &[(&str, Option<&column::Scalar<'_>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1313,12 +1318,12 @@ impl<'a> Segments<'a> {
     }
 
     /// Returns the minimum value for a column in a set of segments.
-    pub fn column_min(&self, column_name: &str) -> Option<column::Scalar> {
+    pub fn column_min(&self, column_name: &str) -> Option<column::Scalar<'_>> {
         if self.segments.is_empty() {
             return None;
         }
 
-        let mut min_min: Option<column::Scalar> = None;
+        let mut min_min: Option<column::Scalar<'_>> = None;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let min = segment.columns[i].min();
@@ -1334,12 +1339,12 @@ impl<'a> Segments<'a> {
     }
 
     /// Returns the maximum value for a column in a set of segments.
-    pub fn column_max(&self, column_name: &str) -> Option<column::Scalar> {
+    pub fn column_max(&self, column_name: &str) -> Option<column::Scalar<'_>> {
         if self.segments.is_empty() {
             return None;
         }
 
-        let mut max_max: Option<column::Scalar> = None;
+        let mut max_max: Option<column::Scalar<'_>> = None;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let max = segment.columns[i].max();
@@ -1362,7 +1367,7 @@ impl<'a> Segments<'a> {
     /// If the time column has multiple max time values then the result is abitrary.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>, usize)> {
+    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar<'_>>, usize)> {
         // First let's find the segment with the earliest time range.
         // notice we order  a < b on max time range.
         let segment = self
@@ -1394,7 +1399,7 @@ impl<'a> Segments<'a> {
     /// If the time column has multiple max time values then the result is abitrary.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar>, usize)> {
+    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar<'_>>, usize)> {
         // First let's find the segment with the latest time range.
         // notice we order a > b on max time range.
         let segment = self

From e511c5fc4a59a2b0abc13ce83e36b22631245235 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 7 Sep 2020 18:39:57 +0100
Subject: [PATCH 48/73] refactor: stuck

---
 delorean_mem_qe/src/adapter.rs |  2 +-
 delorean_mem_qe/src/lib.rs     |  2 ++
 delorean_mem_qe/src/segment.rs | 12 ++++++------
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
index e7fed4a721..a0fdab6af6 100644
--- a/delorean_mem_qe/src/adapter.rs
+++ b/delorean_mem_qe/src/adapter.rs
@@ -149,7 +149,7 @@ struct SegmentScan {
     predicate: Expr,
 }
 
-impl SegmentScan {
+impl<'a> SegmentScan {
     fn new(store: Arc<Store>, predicate: Expr) -> Self {
         let schema = store.schema().clone();
 
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 4176d3cdd8..29bb987eac 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -32,6 +32,8 @@ impl Store {
     }
 
     pub fn segments(&self) -> Segments<'_> {
+        // let iter: std::slice::Iter<'a, Segment> = self.segments.iter();
+        // let segments = iter.collect::<Vec<&'a Segment>>();
         Segments::new(self.segments.iter().collect::<Vec<&Segment>>())
     }
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 77936fab85..26e69a181b 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -1045,12 +1045,12 @@ impl<'a> Segments<'a> {
     pub fn read_group_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<&column::Scalar<'a>>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         strategy: &GroupingStrategy,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
@@ -1095,12 +1095,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_hash(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<&column::Scalar<'a>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1181,12 +1181,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<&column::Scalar<'a>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());

From 3dd41cb71ddb2b401743dc17cc4f7d55284c515b Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 8 Sep 2020 15:44:05 +0100
Subject: [PATCH 49/73] refactor: tidy encoding

---
 delorean_mem_qe/src/encoding.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 4b057cfc96..32af51f4a7 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -18,8 +18,9 @@ impl<T> PlainFixedOption<T> {
 // No compression
 pub struct PlainFixed<T> {
     values: Vec<T>,
-    buf: Vec<u32>,
-    total_order: bool, // if true the column is totally ordered ascending.
+    // total_order can be used as a hint to stop scanning the column early when
+    // applying a comparison predicate to the column.
+    total_order: bool,
 }
 
 impl<T> PlainFixed<T>
@@ -245,7 +246,7 @@ impl From<&[i64]> for PlainFixed<i64> {
     fn from(v: &[i64]) -> Self {
         Self {
             values: v.to_vec(),
-            buf: Vec::with_capacity(v.len()),
+            // buf: Vec::with_capacity(v.len()),
             total_order: false,
         }
     }
@@ -255,7 +256,7 @@ impl From<&[f64]> for PlainFixed<f64> {
     fn from(v: &[f64]) -> Self {
         Self {
             values: v.to_vec(),
-            buf: Vec::with_capacity(v.len()),
+            // buf: Vec::with_capacity(v.len()),
             total_order: false,
         }
     }

From 9a3e0d24a3ddca99288050797c43be8c84d40e0f Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 8 Sep 2020 18:51:16 +0100
Subject: [PATCH 50/73] refactor: cruft

---
 delorean_mem_qe/src/column.rs   | 73 +--------------------------------
 delorean_mem_qe/src/encoding.rs |  9 ----
 delorean_mem_qe/src/segment.rs  | 12 +++---
 3 files changed, 6 insertions(+), 88 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 7d7618d92b..45070387c7 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -595,65 +595,7 @@ impl Column {
             Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)),
         }
     }
-
-    // /// materialise all rows including and after row_id
-    // pub fn scan_from(&self, _row_id: usize) -> Option<Vector> {
-    //     unimplemented!("todo");
-    //     // if row_id >= self.num_rows() {
-    //     //     println!(
-    //     //         "asking for {:?} but only got {:?} rows",
-    //     //         row_id,
-    //     //         self.num_rows()
-    //     //     );
-    //     //     return None;
-    //     // }
-
-    //     // println!(
-    //     //     "asking for {:?} with a column having {:?} rows",
-    //     //     row_id,
-    //     //     self.num_rows()
-    //     // );
-    //     // match self {
-    //     //     Column::String(c) => Some(Vector::String(c.scan_from(row_id))),
-    //     //     Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))),
-    //     //     Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))),
-    //     // }
-    // }
-
-    /// Given the provided row_id scans the column until a non-null value found
-    /// or the column is exhausted.
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<Scalar<'_>> {
-        match self {
-            Column::String(c) => {
-                if row_id >= self.num_rows() {
-                    return None;
-                }
-
-                match c.scan_from_until_some(row_id) {
-                    Some(v) => Some(Scalar::String(v)),
-                    None => None,
-                }
-            }
-            Column::Float(c) => {
-                if row_id >= self.num_rows() {
-                    return None;
-                }
-                match c.scan_from_until_some(row_id) {
-                    Some(v) => Some(Scalar::Float(v)),
-                    None => None,
-                }
-            }
-            Column::Integer(c) => {
-                if row_id >= self.num_rows() {
-                    return None;
-                }
-                match c.scan_from_until_some(row_id) {
-                    Some(v) => Some(Scalar::Integer(v)),
-                    None => None,
-                }
-            }
-        }
-    }
+}
 
     pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool {
         match self {
@@ -1017,11 +959,6 @@ impl String {
         self.data.scan_from(row_id)
     }
 
-    pub fn scan_from_until_some(&self, _row_id: usize) -> Option<&std::string::String> {
-        unreachable!("don't need this");
-        // self.data.scan_from_until_some(row_id)
-    }
-
     // TODO(edd) shouldn't let roaring stuff leak out...
     pub fn group_row_ids(&self) -> &std::collections::BTreeMap<u32, croaring::Bitmap> {
         self.data.group_row_ids()
@@ -1065,10 +1002,6 @@ impl Float {
         self.data.scan_from(row_id)
     }
 
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<f64> {
-        self.data.scan_from_until_some(row_id)
-    }
-
     pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 {
         self.data.sum_by_ids(row_ids)
     }
@@ -1138,10 +1071,6 @@ impl Integer {
         self.data.scan_from(row_id)
     }
 
-    pub fn scan_from_until_some(&self, row_id: usize) -> Option<i64> {
-        self.data.scan_from_until_some(row_id)
-    }
-
     /// Find the first logical row that contains this value.
     pub fn row_id_eq_value(&self, v: i64) -> Option<usize> {
         if !self.meta.maybe_contains_value(v) {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 32af51f4a7..1bed37f027 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -75,15 +75,6 @@ where
         self.values.clone()
     }
 
-    // TODO(edd): fix this when added NULL support
-    pub fn scan_from_until_some(&self, _row_id: usize) -> Option<T> {
-        unreachable!("to remove");
-        // for v in self.values.iter().skip(row_id) {
-        //     return Some(*v);
-        // }
-        // None
-    }
-
     pub fn scan_from(&self, row_id: usize) -> &[T] {
         &self.values[row_id..]
     }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 26e69a181b..47e673b28f 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -944,24 +944,22 @@ pub struct SegmentMetaData {
 
     column_names: Vec<String>,
     time_range: (i64, i64),
-
     // row_ids is a bitmap containing all row ids.
-    row_ids: croaring::Bitmap,
+    // row_ids: croaring::Bitmap,
     // TODO column sort order
 }
 
 impl SegmentMetaData {
     pub fn new(rows: usize, schema: Schema) -> Self {
-        let mut meta = Self {
+        Self {
             size: 0,
             rows,
             schema,
             column_names: vec![],
             time_range: (0, 0),
-            row_ids: croaring::Bitmap::create_with_capacity(rows as u32),
-        };
-        meta.row_ids.add_range(0..rows as u64);
-        meta
+            // row_ids: croaring::Bitmap::create_with_capacity(rows as u32),
+        }
+        // meta.row_ids.add_range(0..rows as u64);
     }
 
     pub fn schema(&self) -> SchemaRef {

From e5f9c7c57433a3a3d47175001757a70d3702118a Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 10 Sep 2020 21:07:28 +0100
Subject: [PATCH 51/73] refactor: add encoding trait

---
 Cargo.lock                      |  40 +++++++--
 delorean_mem_qe/src/bin/main.rs |   8 +-
 delorean_mem_qe/src/column.rs   |  22 ++---
 delorean_mem_qe/src/encoding.rs | 151 ++++++++++++++++++++++++++++++--
 delorean_mem_qe/src/segment.rs  |   4 +-
 5 files changed, 190 insertions(+), 35 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ed5f18fdfb..6c92c9ccb3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -87,7 +87,11 @@ checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
+<<<<<<< HEAD
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
+=======
+source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
+>>>>>>> 27b73c4... refactor: add encoding trait
 dependencies = [
  "chrono",
  "csv",
@@ -107,7 +111,7 @@ dependencies = [
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
+source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
 dependencies = [
  "chrono",
  "csv",
@@ -665,7 +669,27 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "2.0.0-SNAPSHOT"
+<<<<<<< HEAD
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
+=======
+source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
+dependencies = [
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "clap",
+ "crossbeam",
+ "fnv",
+ "num_cpus",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "paste",
+ "rustyline",
+ "sqlparser",
+]
+
+[[package]]
+name = "datafusion"
+version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
+>>>>>>> 27b73c4... refactor: add encoding trait
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "chrono",
@@ -689,7 +713,11 @@ dependencies = [
  "clap",
  "criterion",
  "csv",
+<<<<<<< HEAD
  "delorean_arrow",
+=======
+ "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)",
+>>>>>>> 27b73c4... refactor: add encoding trait
  "delorean_generated_types",
  "delorean_ingest",
  "delorean_line_parser",
@@ -784,16 +812,16 @@ dependencies = [
 name = "delorean_mem_qe"
 version = "0.1.0"
 dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "chrono",
  "croaring",
  "crossbeam",
- "datafusion",
+ "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "delorean_table",
  "env_logger",
  "human_format",
  "log",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "snafu",
 ]
 
@@ -2073,9 +2101,9 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a"
+source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
 dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)",
  "brotli",
  "byteorder",
  "chrono",
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index d4b810467d..b172338da8 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -121,10 +121,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                // if i < 364 {
-                //     i += 1;
-                //     continue;
-                // }
+                if i < 364 {
+                    i += 1;
+                    continue;
+                }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 45070387c7..fcef27c1b8 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -595,7 +595,6 @@ impl Column {
             Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)),
         }
     }
-}
 
     pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool {
         match self {
@@ -965,12 +964,13 @@ impl String {
     }
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct Float {
     meta: metadata::F64,
 
     // TODO(edd): compression of float columns
-    data: encoding::PlainFixed<f64>,
+    // data: encoding::PlainFixed<f64>,
+    data: Box<dyn encoding::NumericEncoding<Item = f64>>,
 }
 
 impl Float {
@@ -1029,17 +1029,17 @@ impl From<&[f64]> for Float {
 
         Self {
             meta: metadata::F64::new((min, max), len),
-            data: encoding::PlainFixed::from(values),
+            data: Box::new(encoding::PlainFixed::from(values)),
         }
     }
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug)]
 pub struct Integer {
     meta: metadata::I64,
 
     // TODO(edd): compression of integers
-    data: encoding::PlainFixed<i64>,
+    data: Box<dyn encoding::NumericEncoding<Item = i64>>,
 }
 
 impl Integer {
@@ -1078,14 +1078,6 @@ impl Integer {
         }
         self.data.row_id_eq_value(v)
     }
-
-    /// Find the first logical row that contains a value >= v
-    pub fn row_id_ge_value(&self, v: i64) -> Option<usize> {
-        if self.meta.max() < v {
-            return None;
-        }
-        self.data.row_id_ge_value(v)
-    }
 }
 
 impl From<&[i64]> for Integer {
@@ -1102,7 +1094,7 @@ impl From<&[i64]> for Integer {
 
         Self {
             meta: metadata::I64::new((min, max), len),
-            data: encoding::PlainFixed::from(values),
+            data: Box::new(encoding::PlainFixed::from(values)),
         }
     }
 }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 1bed37f027..9c04c2d609 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -1,16 +1,39 @@
 use std::collections::{BTreeMap, BTreeSet};
 use std::iter;
 
-// TODO(edd): this is just for convenience. In reality one would store nulls
-// separately and not use `Option<T>`.
-#[derive(Debug, Default)]
-pub struct PlainFixedOption<T> {
-    values: Vec<Option<T>>,
+use arrow::array::Array;
+
+pub trait NumericEncoding: Send + Sync {
+    type Item;
+
+    fn size(&self) -> usize;
+    fn value(&self, row_id: usize) -> Self::Item;
+    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
+    fn all_encoded_values(&self) -> Vec<Self::Item>;
+    fn scan_from(&self, row_id: usize) -> &[Self::Item];
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item;
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item;
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
+
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
+    fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &Self::Item,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap;
+    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
 }
 
-impl<T> PlainFixedOption<T> {
-    pub fn size(&self) -> usize {
-        self.values.len() * std::mem::size_of::<Option<T>>()
+impl std::fmt::Debug for dyn NumericEncoding<Item = f64> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", "todo")
+    }
+}
+
+impl std::fmt::Debug for dyn NumericEncoding<Item = i64> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", "todo")
     }
 }
 
@@ -253,6 +276,118 @@ impl From<&[f64]> for PlainFixed<f64> {
     }
 }
 
+impl NumericEncoding for PlainFixed<f64> {
+    type Item = f64;
+
+    fn size(&self) -> usize {
+        self.size()
+    }
+
+    fn value(&self, row_id: usize) -> Self::Item {
+        self.value(row_id)
+    }
+
+    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.values(row_ids)
+    }
+
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.encoded_values(row_ids)
+    }
+
+    fn all_encoded_values(&self) -> Vec<Self::Item> {
+        self.all_encoded_values()
+    }
+
+    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
+        self.scan_from(row_id)
+    }
+
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
+        self.sum_by_ids(row_ids)
+    }
+
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
+        self.sum_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        self.count_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &Self::Item,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap {
+        self.row_ids_single_cmp_roaring(wanted, order)
+    }
+
+    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
+        self.row_ids_gte_lt_roaring(from, to)
+    }
+
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
+        self.row_id_eq_value(v)
+    }
+}
+
+impl NumericEncoding for PlainFixed<i64> {
+    type Item = i64;
+
+    fn size(&self) -> usize {
+        self.size()
+    }
+
+    fn value(&self, row_id: usize) -> Self::Item {
+        self.value(row_id)
+    }
+
+    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.values(row_ids)
+    }
+
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.encoded_values(row_ids)
+    }
+
+    fn all_encoded_values(&self) -> Vec<Self::Item> {
+        self.all_encoded_values()
+    }
+
+    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
+        self.scan_from(row_id)
+    }
+
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
+        self.sum_by_ids(row_ids)
+    }
+
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
+        self.sum_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        self.count_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &Self::Item,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap {
+        self.row_ids_single_cmp_roaring(wanted, order)
+    }
+
+    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
+        self.row_ids_gte_lt_roaring(from, to)
+    }
+
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
+        self.row_id_eq_value(v)
+    }
+}
+
 #[derive(Debug, Default)]
 pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 47e673b28f..9a15b03430 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -423,8 +423,8 @@ impl Segment {
         }
         // println!("groups: {:?}", hash_table.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
-        // BTreeMap::new()
-        hash_table
+        BTreeMap::new()
+        // hash_table
     }
 
     pub fn aggregate_by_group_using_sort(

From 9f299461ed428f3ec612e80de7776791a9dd42ba Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 11 Sep 2020 11:11:38 +0100
Subject: [PATCH 52/73] feat: working on arrow backing

---
 delorean_mem_qe/src/encoding.rs | 214 +++++++++++++++++++++++++++++++-
 1 file changed, 213 insertions(+), 1 deletion(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 9c04c2d609..e19d533306 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -1,7 +1,38 @@
 use std::collections::{BTreeMap, BTreeSet};
 use std::iter;
 
-use arrow::array::Array;
+use arrow::array::{Array, PrimitiveArray};
+use arrow::datatypes::ArrowNumericType;
+use arrow::datatypes::*;
+
+pub trait Encoding: Send + Sync {
+    type Item;
+
+    fn size(&self) -> usize;
+    fn value(&self, row_id: usize) -> Self::Item;
+    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
+    fn all_encoded_values(&self) -> Vec<Self::Item>;
+    fn scan_from(&self, row_id: usize) -> &[Self::Item];
+
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
+
+    // TODO(edd): clean up the API for getting row ids that match predicates.
+    //
+    // Ideally you should be able to provide a collection of predicates to
+    // match on.
+    //
+    // A simpler approach would be to provide a method that matches on a single
+    // predicate and then call that multiple times, unioning or intersecting the
+    // resulting row sets.
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
+    fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &Self::Item,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap;
+    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
+}
 
 pub trait NumericEncoding: Send + Sync {
     type Item;
@@ -37,6 +68,173 @@ impl std::fmt::Debug for dyn NumericEncoding<Item = i64> {
     }
 }
 
+pub struct PlainArrow<T>
+where
+    // T: ArrowNumericType + std::ops::Add,
+    T: ArrowNumericType,
+    // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
+{
+    arr: PrimitiveArray<T>,
+    // _phantom: T,
+}
+
+impl<T> PlainArrow<T>
+where
+    // T: ArrowNumericType + std::ops::Add,
+    T: ArrowNumericType,
+    // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
+{
+    pub fn size(&self) -> usize {
+        self.arr.len()
+    }
+
+    pub fn value(&self, row_id: usize) -> Option<T::Native> {
+        if self.arr.is_null(row_id) {
+            return None;
+        }
+        Some(self.arr.value(row_id))
+    }
+
+    fn values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
+        let mut out = Vec::with_capacity(row_ids.len());
+        for &row_id in row_ids {
+            if self.arr.is_null(row_id) {
+                out.push(None)
+            } else {
+                out.push(Some(self.arr.value(row_id)))
+            }
+        }
+        assert_eq!(out.len(), row_ids.len());
+        out
+    }
+
+    /// Well this is terribly slow
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
+        self.values(row_ids)
+    }
+
+    /// TODO(edd): there must be a more efficient way.
+    pub fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
+        let mut out = Vec::with_capacity(self.arr.len());
+        for i in 0..self.arr.len() {
+            if self.arr.is_null(i) {
+                out.push(None)
+            } else {
+                out.push(Some(self.arr.value(i)))
+            }
+        }
+        assert_eq!(out.len(), self.arr.len());
+        out
+    }
+
+    pub fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
+        // todo
+
+        &[]
+
+        // let mut out = Vec::with_capacity(self.arr.len() - row_id);
+        // for i in row_id..self.arr.len() {
+        //     if self.arr.is_null(i) {
+        //         out.push(None)
+        //     } else {
+        //         out.push(Some(self.arr.value(i)))
+        //     }
+        // }
+        // assert_eq!(out.len(), self.arr.len());
+        // out.as_slice()
+    }
+
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T::Native> {
+        // let mut res = T::Native::default();
+
+        // // HMMMMM - materialising which has a memory cost.
+        // let vec = row_ids.to_vec();
+        // for v in vec {
+        //     res += self.arr.value(v as usize);
+        // }
+        None // todo
+    }
+
+    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
+        // if the column contains a null value between the range then the result
+        // will be None.
+        for i in from_row_id..to_row_id {
+            if self.arr.is_null(i) {
+                return None;
+            }
+        }
+
+        // Otherwise sum all the values between in the range.
+        // let mut res = f64::from(self.arr.value(from_row_id));
+        // for i in from_row_id + 1..to_row_id {
+        //     res = res + self.arr.value(i);
+        // }
+        // Some(res)
+        None
+    }
+
+    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        // TODO - count values that are not null in the row range.
+        0 // todo
+    }
+}
+
+impl NumericEncoding for PlainArrow<Float64Type> {
+    type Item = Option<f64>;
+
+    fn size(&self) -> usize {
+        self.size()
+    }
+
+    fn value(&self, row_id: usize) -> Self::Item {
+        self.value(row_id)
+    }
+
+    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.values(row_ids)
+    }
+
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        self.encoded_values(row_ids)
+    }
+
+    fn all_encoded_values(&self) -> Vec<Self::Item> {
+        self.all_encoded_values()
+    }
+
+    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
+        self.scan_from(row_id)
+    }
+
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
+        self.sum_by_ids(row_ids)
+    }
+
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
+        self.sum_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        self.count_by_id_range(from_row_id, to_row_id)
+    }
+
+    fn row_ids_single_cmp_roaring(
+        &self,
+        wanted: &Self::Item,
+        order: std::cmp::Ordering,
+    ) -> croaring::Bitmap {
+        self.row_ids_single_cmp_roaring(wanted, order)
+    }
+
+    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
+        self.row_ids_gte_lt_roaring(from, to)
+    }
+
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
+        self.row_id_eq_value(v)
+    }
+}
+
 #[derive(Debug, Default)]
 // No compression
 pub struct PlainFixed<T> {
@@ -761,6 +959,20 @@ impl std::convert::From<&delorean_table::Packer<delorean_table::ByteArray>> for
 
 #[cfg(test)]
 mod test {
+
+    #[test]
+    fn plain_arrow() {
+        let col = super::PlainArrow {
+            arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]),
+        };
+
+        let encoded = col.all_encoded_values();
+        assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]);
+
+        let sum = col.sum_by_id_range(0, 1);
+        assert_eq!(sum, Some(46.86));
+    }
+
     #[test]
     fn plain_row_ids_roaring_eq() {
         let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5];

From 47b2f7940b4817589ec09bbcd88bfe0245afeeb5 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 11 Sep 2020 13:39:02 +0100
Subject: [PATCH 53/73] refactor: spike on arrow encoding

---
 Cargo.lock                      |  10 +
 delorean_mem_qe/Cargo.toml      |   4 +-
 delorean_mem_qe/src/bin/main.rs |   1 +
 delorean_mem_qe/src/column.rs   |  96 +++++++-
 delorean_mem_qe/src/encoding.rs | 419 +++++++++++++-------------------
 delorean_mem_qe/src/segment.rs  |  15 ++
 6 files changed, 281 insertions(+), 264 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6c92c9ccb3..63e340c752 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -819,6 +819,7 @@ dependencies = [
  "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "delorean_table",
  "env_logger",
+ "heapsize",
  "human_format",
  "log",
  "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
@@ -1356,6 +1357,15 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "heapsize"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1679e6ea370dee694f91f1dc469bf94cf8f52051d147aec3e1f9497c6fc22461"
+dependencies = [
+ "winapi 0.3.8",
+]
+
 [[package]]
 name = "heck"
 version = "0.3.1"
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index c2086ac66f..742df5ba19 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -12,9 +12,7 @@ delorean_table = { path = "../delorean_table" }
 arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
 parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
 datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
-#arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" }
-#parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" }
-#datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" }
+heapsize = "0.4.2"
 snafu = "0.6.8"
 croaring = "0.4.5"
 crossbeam = "0.7.3"
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index b172338da8..90acdd167f 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -134,6 +134,7 @@ fn build_store(
                 let mut segment = Segment::new(rb.num_rows(), schema);
                 convert_record_batch(rb, &mut segment)?;
 
+                println!("{}", &segment);
                 store.add_segment(segment);
             }
             Ok(None) => {
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index fcef27c1b8..e0b5df55dc 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -873,6 +873,23 @@ impl Column {
     }
 }
 
+impl std::fmt::Display for Column {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self {
+            Column::String(c) => {
+                write!(f, "{}", c)?;
+            }
+            Column::Float(c) => {
+                write!(f, "{}", c)?;
+            }
+            Column::Integer(c) => {
+                write!(f, "{}", c)?;
+            }
+        }
+        Ok(())
+    }
+}
+
 impl AggregatableByRange for &Column {
     fn aggregate_by_id_range(
         &self,
@@ -964,6 +981,12 @@ impl String {
     }
 }
 
+impl std::fmt::Display for String {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+    }
+}
+
 #[derive(Debug)]
 pub struct Float {
     meta: metadata::F64,
@@ -1015,6 +1038,12 @@ impl Float {
     }
 }
 
+impl std::fmt::Display for Float {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+    }
+}
+
 impl From<&[f64]> for Float {
     fn from(values: &[f64]) -> Self {
         let len = values.len();
@@ -1034,6 +1063,32 @@ impl From<&[f64]> for Float {
     }
 }
 
+// use arrow::array::Array;
+// impl From<arrow::array::PrimitiveArray<arrow::datatypes::Float64Type>> for Float {
+//     fn from(arr: arrow::array::PrimitiveArray<arrow::datatypes::Float64Type>) -> Self {
+//         let len = arr.len();
+//         let mut min = std::f64::MAX;
+//         let mut max = std::f64::MIN;
+
+//         // calculate min/max for meta data
+//         // TODO(edd): can use compute kernels for this.
+//         for i in 0..arr.len() {
+//             if arr.is_null(i) {
+//                 continue;
+//             }
+
+//             let v = arr.value(i);
+//             min = min.min(v);
+//             max = max.max(v);
+//         }
+
+//         Self {
+//             meta: metadata::F64::new((min, max), len),
+//             data: Box::new(encoding::PlainArrow { arr }),
+//         }
+//     }
+// }
+
 #[derive(Debug)]
 pub struct Integer {
     meta: metadata::I64,
@@ -1080,6 +1135,12 @@ impl Integer {
     }
 }
 
+impl std::fmt::Display for Integer {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+    }
+}
+
 impl From<&[i64]> for Integer {
     fn from(values: &[i64]) -> Self {
         let len = values.len();
@@ -1100,11 +1161,12 @@ impl From<&[i64]> for Integer {
 }
 
 pub mod metadata {
+    use std::mem::size_of;
+
     #[derive(Debug, Default)]
     pub struct Str {
         range: (Option<String>, Option<String>),
         num_rows: usize,
-        // sparse_index: BTreeMap<String, usize>,
     }
 
     impl Str {
@@ -1145,8 +1207,20 @@ pub mod metadata {
         }
 
         pub fn size(&self) -> usize {
-            // TODO!!!!
-            0 //self.range.0.len() + self.range.1.len() + std::mem::size_of::<usize>()
+            // size of types for num_rows and range
+            let base_size = size_of::<usize>() + (2 * size_of::<Option<String>>());
+            match &self.range {
+                (None, None) => base_size,
+                (Some(min), None) => base_size + min.len(),
+                (None, Some(max)) => base_size + max.len(),
+                (Some(min), Some(max)) => base_size + min.len() + max.len(),
+            }
+        }
+    }
+
+    impl std::fmt::Display for Str {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "Range: ({:?})", self.range)
         }
     }
 
@@ -1184,7 +1258,13 @@ pub mod metadata {
         }
 
         pub fn size(&self) -> usize {
-            std::mem::size_of::<(f64, f64)>() + std::mem::size_of::<usize>()
+            size_of::<usize>() + (size_of::<(f64, f64)>())
+        }
+    }
+
+    impl std::fmt::Display for F64 {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "Range: ({:?})", self.range)
         }
     }
 
@@ -1219,7 +1299,13 @@ pub mod metadata {
         }
 
         pub fn size(&self) -> usize {
-            std::mem::size_of::<(i64, i64)>() + std::mem::size_of::<usize>()
+            size_of::<usize>() + (size_of::<(i64, i64)>())
+        }
+    }
+
+    impl std::fmt::Display for I64 {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "Range: ({:?})", self.range)
         }
     }
 }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index e19d533306..54b90b0a72 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -1,11 +1,11 @@
 use std::collections::{BTreeMap, BTreeSet};
 use std::iter;
+use std::mem::size_of;
 
 use arrow::array::{Array, PrimitiveArray};
 use arrow::datatypes::ArrowNumericType;
-use arrow::datatypes::*;
 
-pub trait Encoding: Send + Sync {
+pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
     type Item;
 
     fn size(&self) -> usize;
@@ -15,39 +15,15 @@ pub trait Encoding: Send + Sync {
     fn all_encoded_values(&self) -> Vec<Self::Item>;
     fn scan_from(&self, row_id: usize) -> &[Self::Item];
 
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
-
-    // TODO(edd): clean up the API for getting row ids that match predicates.
-    //
-    // Ideally you should be able to provide a collection of predicates to
-    // match on.
-    //
-    // A simpler approach would be to provide a method that matches on a single
-    // predicate and then call that multiple times, unioning or intersecting the
-    // resulting row sets.
-    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
-    fn row_ids_single_cmp_roaring(
-        &self,
-        wanted: &Self::Item,
-        order: std::cmp::Ordering,
-    ) -> croaring::Bitmap;
-    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
-}
-
-pub trait NumericEncoding: Send + Sync {
-    type Item;
-
-    fn size(&self) -> usize;
-    fn value(&self, row_id: usize) -> Self::Item;
-    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
-    fn all_encoded_values(&self) -> Vec<Self::Item>;
-    fn scan_from(&self, row_id: usize) -> &[Self::Item];
     fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item;
     fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item;
+
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
+    fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64;
 
     fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
+    fn row_id_ge_value(&self, v: Self::Item) -> Option<usize>;
+
     fn row_ids_single_cmp_roaring(
         &self,
         wanted: &Self::Item,
@@ -56,39 +32,37 @@ pub trait NumericEncoding: Send + Sync {
     fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap;
 }
 
-impl std::fmt::Debug for dyn NumericEncoding<Item = f64> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", "todo")
-    }
-}
-
-impl std::fmt::Debug for dyn NumericEncoding<Item = i64> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", "todo")
-    }
-}
-
+#[derive(Debug)]
 pub struct PlainArrow<T>
 where
-    // T: ArrowNumericType + std::ops::Add,
     T: ArrowNumericType,
-    // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
+    T::Native: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::ops::Add<Output = T::Native>,
 {
     arr: PrimitiveArray<T>,
-    // _phantom: T,
 }
 
-impl<T> PlainArrow<T>
+impl<T> NumericEncoding for PlainArrow<T>
 where
-    // T: ArrowNumericType + std::ops::Add,
-    T: ArrowNumericType,
-    // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
+    T: ArrowNumericType + std::fmt::Debug,
+    T::Native: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::ops::Add<Output = T::Native>,
 {
-    pub fn size(&self) -> usize {
+    type Item = Option<T::Native>;
+
+    fn size(&self) -> usize {
         self.arr.len()
     }
 
-    pub fn value(&self, row_id: usize) -> Option<T::Native> {
+    fn value(&self, row_id: usize) -> Option<T::Native> {
         if self.arr.is_null(row_id) {
             return None;
         }
@@ -109,12 +83,12 @@ where
     }
 
     /// Well this is terribly slow
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
         self.values(row_ids)
     }
 
     /// TODO(edd): there must be a more efficient way.
-    pub fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
+    fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
         let mut out = Vec::with_capacity(self.arr.len());
         for i in 0..self.arr.len() {
             if self.arr.is_null(i) {
@@ -127,11 +101,10 @@ where
         out
     }
 
-    pub fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
-        // todo
-
-        &[]
-
+    // TODO(edd): problem here is returning a slice because we need to own the
+    // backing vector.
+    fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
+        unimplemented!("need to figure out returning a slice");
         // let mut out = Vec::with_capacity(self.arr.len() - row_id);
         // for i in row_id..self.arr.len() {
         //     if self.arr.is_null(i) {
@@ -144,18 +117,22 @@ where
         // out.as_slice()
     }
 
-    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T::Native> {
-        // let mut res = T::Native::default();
-
-        // // HMMMMM - materialising which has a memory cost.
-        // let vec = row_ids.to_vec();
-        // for v in vec {
-        //     res += self.arr.value(v as usize);
-        // }
-        None // todo
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T::Native> {
+        // TODO(edd): this is expensive - may pay to expose method to do this
+        // where you accept an array.
+        let mut res = T::Native::default();
+        let vec = row_ids.to_vec();
+        for row_id in vec {
+            let i = row_id as usize;
+            if self.arr.is_null(i) {
+                return None;
+            }
+            res = res + self.arr.value(i);
+        }
+        Some(res)
     }
 
-    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
         // if the column contains a null value between the range then the result
         // will be None.
         for i in from_row_id..to_row_id {
@@ -165,57 +142,35 @@ where
         }
 
         // Otherwise sum all the values between in the range.
-        // let mut res = f64::from(self.arr.value(from_row_id));
-        // for i in from_row_id + 1..to_row_id {
-        //     res = res + self.arr.value(i);
-        // }
-        // Some(res)
-        None
-    }
-
-    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        // TODO - count values that are not null in the row range.
-        0 // todo
-    }
-}
-
-impl NumericEncoding for PlainArrow<Float64Type> {
-    type Item = Option<f64>;
-
-    fn size(&self) -> usize {
-        self.size()
-    }
-
-    fn value(&self, row_id: usize) -> Self::Item {
-        self.value(row_id)
-    }
-
-    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.values(row_ids)
-    }
-
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.encoded_values(row_ids)
-    }
-
-    fn all_encoded_values(&self) -> Vec<Self::Item> {
-        self.all_encoded_values()
-    }
-
-    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
-        self.scan_from(row_id)
-    }
-
-    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
-        self.sum_by_ids(row_ids)
-    }
-
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
-        self.sum_by_id_range(from_row_id, to_row_id)
+        let mut res = T::Native::default();
+        for i in from_row_id..to_row_id {
+            res = res + self.arr.value(i);
+        }
+        Some(res)
     }
 
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        self.count_by_id_range(from_row_id, to_row_id)
+        // TODO - count values that are not null in the row range.
+        let mut count = 0;
+        for i in from_row_id..to_row_id {
+            if self.arr.is_null(i) {
+                continue;
+            }
+            count += 1;
+        }
+        count
+    }
+
+    fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
+        todo!()
+    }
+
+    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
+        todo!()
+    }
+
+    fn row_id_ge_value(&self, v: Self::Item) -> Option<usize> {
+        todo!()
     }
 
     fn row_ids_single_cmp_roaring(
@@ -223,15 +178,26 @@ impl NumericEncoding for PlainArrow<Float64Type> {
         wanted: &Self::Item,
         order: std::cmp::Ordering,
     ) -> croaring::Bitmap {
-        self.row_ids_single_cmp_roaring(wanted, order)
+        todo!()
     }
 
     fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
-        self.row_ids_gte_lt_roaring(from, to)
+        todo!()
     }
+}
 
-    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
-        self.row_id_eq_value(v)
+impl<T: ArrowNumericType> std::fmt::Display for PlainArrow<T>
+where
+    T: ArrowNumericType + std::fmt::Debug,
+    T::Native: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::ops::Add<Output = T::Native>,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[PlainArrow<T>] size: {}", self.size())
     }
 }
 
@@ -242,31 +208,60 @@ pub struct PlainFixed<T> {
     // total_order can be used as a hint to stop scanning the column early when
     // applying a comparison predicate to the column.
     total_order: bool,
+
+    size: usize,
 }
 
-impl<T> PlainFixed<T>
+impl<T> std::fmt::Display for PlainFixed<T>
 where
-    T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign,
+    T: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::fmt::Display
+        + Sync
+        + Send
+        + std::ops::AddAssign,
 {
-    pub fn size(&self) -> usize {
-        self.values.len() * std::mem::size_of::<T>()
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[PlainFixed<T>] size: {}", self.size(),)
+    }
+}
+
+impl<T> NumericEncoding for PlainFixed<T>
+where
+    T: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::fmt::Display
+        + Sync
+        + Send
+        + std::ops::AddAssign,
+{
+    type Item = T;
+
+    fn size(&self) -> usize {
+        self.size
     }
 
-    pub fn row_id_eq_value(&self, v: T) -> Option<usize> {
+    fn row_id_eq_value(&self, v: T) -> Option<usize> {
         self.values.iter().position(|x| *x == v)
     }
 
-    pub fn row_id_ge_value(&self, v: T) -> Option<usize> {
+    fn row_id_ge_value(&self, v: T) -> Option<usize> {
         self.values.iter().position(|x| *x >= v)
     }
 
     // get value at row_id. Panics if out of bounds.
-    pub fn value(&self, row_id: usize) -> T {
+    fn value(&self, row_id: usize) -> T {
         self.values[row_id]
     }
 
     /// Return the decoded values for the provided logical row ids.
-    pub fn values(&self, row_ids: &[usize]) -> Vec<T> {
+    fn values(&self, row_ids: &[usize]) -> Vec<T> {
         let mut out = Vec::with_capacity(row_ids.len());
         for chunks in row_ids.chunks_exact(4) {
             out.push(self.values[chunks[3]]);
@@ -286,24 +281,24 @@ where
 
     /// Return the raw encoded values for the provided logical row ids. For Plain
     /// encoding this is just the decoded values.
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
         self.values(row_ids)
     }
 
     /// Return all encoded values. For this encoding this is just the decoded
     /// values
-    pub fn all_encoded_values(&self) -> Vec<T> {
+    fn all_encoded_values(&self) -> Vec<T> {
         self.values.clone()
     }
 
-    pub fn scan_from(&self, row_id: usize) -> &[T] {
+    fn scan_from(&self, row_id: usize) -> &[T] {
         &self.values[row_id..]
     }
 
     /// returns a set of row ids that match a single ordering on a desired value
     ///
     /// This supports `value = x` , `value < x` or `value > x`.
-    pub fn row_ids_single_cmp_roaring(
+    fn row_ids_single_cmp_roaring(
         &self,
         wanted: &T,
         order: std::cmp::Ordering,
@@ -343,7 +338,7 @@ where
     /// returns a set of row ids that match the half open interval `[from, to)`.
     ///
     /// The main use-case for this is time range filtering.
-    pub fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap {
+    fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap {
         let mut bm = croaring::Bitmap::create();
 
         let mut found = false; //self.values[0];
@@ -376,7 +371,7 @@ where
         bm
     }
 
-    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
         let mut res = T::default();
         for v in self.values[from_row_id..to_row_id].iter() {
             res += *v;
@@ -384,12 +379,12 @@ where
         res
     }
 
-    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
         to_row_id - from_row_id
     }
 
     // TODO(edd): make faster
-    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
         let mut res = T::default();
         // println!(
         //     "cardinality is {:?} out of {:?}",
@@ -449,7 +444,7 @@ where
         res
     }
 
-    pub fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
+    fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
         row_ids.cardinality()
     }
 }
@@ -460,6 +455,10 @@ impl From<&[i64]> for PlainFixed<i64> {
             values: v.to_vec(),
             // buf: Vec::with_capacity(v.len()),
             total_order: false,
+            size: size_of::<Vec<i64>>()
+                + (size_of::<i64>() * v.len())
+                + size_of::<bool>()
+                + size_of::<usize>(),
         }
     }
 }
@@ -470,122 +469,14 @@ impl From<&[f64]> for PlainFixed<f64> {
             values: v.to_vec(),
             // buf: Vec::with_capacity(v.len()),
             total_order: false,
+            size: size_of::<Vec<f64>>()
+                + (size_of::<f64>() * v.len())
+                + size_of::<bool>()
+                + size_of::<usize>(),
         }
     }
 }
 
-impl NumericEncoding for PlainFixed<f64> {
-    type Item = f64;
-
-    fn size(&self) -> usize {
-        self.size()
-    }
-
-    fn value(&self, row_id: usize) -> Self::Item {
-        self.value(row_id)
-    }
-
-    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.values(row_ids)
-    }
-
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.encoded_values(row_ids)
-    }
-
-    fn all_encoded_values(&self) -> Vec<Self::Item> {
-        self.all_encoded_values()
-    }
-
-    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
-        self.scan_from(row_id)
-    }
-
-    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
-        self.sum_by_ids(row_ids)
-    }
-
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
-        self.sum_by_id_range(from_row_id, to_row_id)
-    }
-
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        self.count_by_id_range(from_row_id, to_row_id)
-    }
-
-    fn row_ids_single_cmp_roaring(
-        &self,
-        wanted: &Self::Item,
-        order: std::cmp::Ordering,
-    ) -> croaring::Bitmap {
-        self.row_ids_single_cmp_roaring(wanted, order)
-    }
-
-    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
-        self.row_ids_gte_lt_roaring(from, to)
-    }
-
-    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
-        self.row_id_eq_value(v)
-    }
-}
-
-impl NumericEncoding for PlainFixed<i64> {
-    type Item = i64;
-
-    fn size(&self) -> usize {
-        self.size()
-    }
-
-    fn value(&self, row_id: usize) -> Self::Item {
-        self.value(row_id)
-    }
-
-    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.values(row_ids)
-    }
-
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
-        self.encoded_values(row_ids)
-    }
-
-    fn all_encoded_values(&self) -> Vec<Self::Item> {
-        self.all_encoded_values()
-    }
-
-    fn scan_from(&self, row_id: usize) -> &[Self::Item] {
-        self.scan_from(row_id)
-    }
-
-    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item {
-        self.sum_by_ids(row_ids)
-    }
-
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item {
-        self.sum_by_id_range(from_row_id, to_row_id)
-    }
-
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        self.count_by_id_range(from_row_id, to_row_id)
-    }
-
-    fn row_ids_single_cmp_roaring(
-        &self,
-        wanted: &Self::Item,
-        order: std::cmp::Ordering,
-    ) -> croaring::Bitmap {
-        self.row_ids_single_cmp_roaring(wanted, order)
-    }
-
-    fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
-        self.row_ids_gte_lt_roaring(from, to)
-    }
-
-    fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
-        self.row_id_eq_value(v)
-    }
-}
-
 #[derive(Debug, Default)]
 pub struct DictionaryRLE {
     // stores the mapping between an entry and its assigned index.
@@ -602,7 +493,6 @@ pub struct DictionaryRLE {
     // stores tuples where each pair refers to a dictionary entry and the number
     // of times the entry repeats.
     run_lengths: Vec<(usize, u64)>,
-    run_length_size: usize,
 
     total: u64,
 }
@@ -615,7 +505,6 @@ impl DictionaryRLE {
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
-            run_length_size: 0,
             total: 0,
         }
     }
@@ -627,7 +516,6 @@ impl DictionaryRLE {
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
-            run_length_size: 0,
             total: 0,
         };
 
@@ -663,7 +551,6 @@ impl DictionaryRLE {
                     } else {
                         // start a new run-length
                         self.run_lengths.push((*idx, additional));
-                        self.run_length_size += std::mem::size_of::<(usize, u64)>();
                     }
                     self.index_row_ids
                         .get_mut(&(*idx as u32))
@@ -690,7 +577,6 @@ impl DictionaryRLE {
                         .get_mut(&(idx as u32))
                         .unwrap()
                         .add_range(self.total..self.total + additional);
-                    self.run_length_size += std::mem::size_of::<(usize, u64)>();
                 }
             }
         }
@@ -927,8 +813,28 @@ impl DictionaryRLE {
     }
 
     pub fn size(&self) -> usize {
-        // mapping and reverse mapping then the rles
-        2 * self.map_size + self.run_length_size
+        // entry_index: BTreeMap<Option<String>, usize>,
+
+        // // stores the mapping between an index and its entry.
+        // index_entry: BTreeMap<usize, Option<String>>,
+
+        (self.index_entry.len() * size_of::<BTreeMap<usize, Option<String>>>())
+            + (self.index_row_ids.len() * size_of::<BTreeMap<u32, croaring::Bitmap>>())
+            + size_of::<usize>()
+            + (self.run_lengths.len() * size_of::<Vec<(usize, u64)>>())
+            + size_of::<u64>()
+    }
+}
+
+impl std::fmt::Display for DictionaryRLE {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "[DictionaryRLE] size: {}, dict entries: {}, runs: {} ",
+            self.size(),
+            self.index_entry.len(),
+            self.run_lengths.len()
+        )
     }
 }
 
@@ -959,6 +865,7 @@ impl std::convert::From<&delorean_table::Packer<delorean_table::ByteArray>> for
 
 #[cfg(test)]
 mod test {
+    use super::NumericEncoding;
 
     #[test]
     fn plain_arrow() {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 9a15b03430..f4315c8234 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -934,6 +934,21 @@ impl Segment {
     }
 }
 
+impl std::fmt::Display for Segment {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        writeln!(
+            f,
+            "Rows: {}\nSize: {} Columns: ",
+            self.num_rows(),
+            self.size()
+        )?;
+        for (c, name) in self.columns.iter().zip(self.column_names().iter()) {
+            writeln!(f, "{} {}", name, c)?;
+        }
+        Ok(())
+    }
+}
+
 /// Meta data for a segment. This data is mainly used to determine if a segment
 /// may contain a value that can answer a query.
 #[derive(Debug)]

From 48623d6f77dd8ae6e3f40698af291b6a9d11f0e0 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 11 Sep 2020 17:38:38 +0100
Subject: [PATCH 54/73] refactor: enable broken code

---
 delorean_mem_qe/src/segment.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index f4315c8234..9255817eaa 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -424,6 +424,7 @@ impl Segment {
         // println!("groups: {:?}", hash_table.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
         BTreeMap::new()
+
         // hash_table
     }
 

From d3e819b3bd61f20516dbbdebb6a7ed73a0a7c17a Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 14 Sep 2020 11:22:13 +0100
Subject: [PATCH 55/73] refactor: get column compiling

---
 delorean_mem_qe/src/column.rs   | 923 +++++++++++++++++++-------------
 delorean_mem_qe/src/encoding.rs | 141 ++---
 2 files changed, 614 insertions(+), 450 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index e0b5df55dc..4faffdc4da 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -100,7 +100,7 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
 #[derive(Clone, Debug)]
 pub enum Aggregate<'a> {
     Count(u64),
-    Sum(Scalar<'a>),
+    Sum(Option<Scalar<'a>>),
 }
 
 #[derive(Debug, Clone)]
@@ -109,29 +109,16 @@ pub enum AggregateType {
     Sum,
 }
 
-impl<'a> Aggregate<'a> {
-    pub fn update_with(&mut self, other: Scalar<'a>) {
-        match self {
-            Self::Count(v) => {
-                *v = *v + 1;
-            }
-            Self::Sum(v) => {
-                v.add(other);
-            }
-        }
-    }
-}
+// impl<'a> std::ops::Add<Scalar<'a>> for Aggregate<'a> {
+//     type Output = Aggregate<'a>;
 
-impl<'a> std::ops::Add<Scalar<'a>> for Aggregate<'a> {
-    type Output = Aggregate<'a>;
-
-    fn add(self, _rhs: Scalar<'a>) -> Self::Output {
-        match self {
-            Self::Count(c) => Self::Count(c + 1),
-            Self::Sum(s) => Self::Sum(s + &_rhs),
-        }
-    }
-}
+//     fn add(self, _rhs: Scalar<'a>) -> Self::Output {
+//         match self {
+//             Self::Count(c) => Self::Count(c + 1),
+//             Self::Sum(s) => Self::Sum(s + &_rhs),
+//         }
+//     }
+// }
 
 impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
     type Output = Aggregate<'a>;
@@ -147,7 +134,12 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
             }
             Self::Sum(s) => {
                 if let Self::Sum(other) = _rhs {
-                    Self::Sum(s + other)
+                    match (s, other) {
+                        (None, None) => Self::Sum(None),
+                        (None, Some(other)) => Self::Sum(Some(*other)),
+                        (Some(s), None) => Self::Sum(Some(s)),
+                        (Some(s), Some(other)) => Self::Sum(Some(s + other)),
+                    }
                 } else {
                     panic!("invalid");
                 }
@@ -167,48 +159,49 @@ pub trait AggregatableByRange {
 /// A Vector is a materialised vector of values from a column.
 pub enum Vector<'a> {
     String(Vec<&'a Option<std::string::String>>),
-    Float(Vec<f64>),
-    Integer(Vec<i64>),
+    EncodedString(Vec<i64>),
+    Float(Vec<Option<f64>>),
+    Integer(Vec<Option<i64>>),
 }
 
 impl<'a> Vector<'a> {
-    pub fn aggregate_by_id_range(
-        &self,
-        agg_type: &AggregateType,
-        from_row_id: usize,
-        to_row_id: usize,
-    ) -> Aggregate<'a> {
-        match agg_type {
-            AggregateType::Count => {
-                Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
-            }
-            AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
-        }
-    }
+    // pub fn aggregate_by_id_range(
+    //     &self,
+    //     agg_type: &AggregateType,
+    //     from_row_id: usize,
+    //     to_row_id: usize,
+    // ) -> Aggregate<'a> {
+    //     match agg_type {
+    //         AggregateType::Count => {
+    //             Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
+    //         }
+    //         AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
+    //     }
+    // }
 
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> {
-        match self {
-            Vector::String(_) => {
-                panic!("can't sum strings....");
-            }
-            Vector::Float(values) => {
-                let mut res = 0.0;
-                // TODO(edd): check asm to see if it's vectorising
-                for v in values[from_row_id..to_row_id].iter() {
-                    res += *v;
-                }
-                Scalar::Float(res)
-            }
-            Vector::Integer(values) => {
-                let mut res = 0;
-                // TODO(edd): check asm to see if it's vectorising
-                for v in values[from_row_id..to_row_id].iter() {
-                    res += *v;
-                }
-                Scalar::Integer(res)
-            }
-        }
-    }
+    // fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> {
+    //     match self {
+    //         Vector::String(_) => {
+    //             panic!("can't sum strings....");
+    //         }
+    //         Vector::Float(values) => {
+    //             let mut res = 0.0;
+    //             // TODO(edd): check asm to see if it's vectorising
+    //             for v in values[from_row_id..to_row_id].iter() {
+    //                 res += *v;
+    //             }
+    //             Scalar::Float(res)
+    //         }
+    //         Vector::Integer(values) => {
+    //             let mut res = 0;
+    //             // TODO(edd): check asm to see if it's vectorising
+    //             for v in values[from_row_id..to_row_id].iter() {
+    //                 res += *v;
+    //             }
+    //             Scalar::Integer(res)
+    //         }
+    //     }
+    // }
 
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
         to_row_id - from_row_id
@@ -335,8 +328,8 @@ impl<'a> std::fmt::Display for Vector<'a> {
 #[derive(Debug)]
 pub enum Column {
     String(String),
-    Float(Float),
-    Integer(Integer),
+    Float(NumericColumn<f64>),
+    Integer(NumericColumn<i64>),
 }
 
 impl Column {
@@ -376,13 +369,23 @@ impl Column {
                 if row_id >= self.num_rows() {
                     return None;
                 }
-                Some(Scalar::Float(c.value(row_id)))
+
+                let v = c.value(row_id);
+                if let Some(v) = v {
+                    return Some(Scalar::Float(v));
+                }
+                None
             }
             Column::Integer(c) => {
                 if row_id >= self.num_rows() {
                     return None;
                 }
-                Some(Scalar::Integer(c.value(row_id)))
+
+                let v = c.value(row_id);
+                if let Some(v) = v {
+                    return Some(Scalar::Integer(v));
+                }
+                None
             }
         }
     }
@@ -485,7 +488,7 @@ impl Column {
                 let now = std::time::Instant::now();
                 let v = c.encoded_values(&row_ids_vec);
                 log::debug!("time getting encoded values {:?}", now.elapsed());
-                Vector::Integer(v)
+                Vector::EncodedString(v)
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -518,7 +521,7 @@ impl Column {
                 log::debug!("time getting encoded values {:?}", now.elapsed());
 
                 log::debug!("dictionary {:?}", c.data.dictionary());
-                Vector::Integer(v)
+                Vector::EncodedString(v)
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -546,7 +549,7 @@ impl Column {
                 log::debug!("time getting all encoded values {:?}", now.elapsed());
 
                 log::debug!("dictionary {:?}", c.data.dictionary());
-                Vector::Integer(v)
+                Vector::EncodedString(v)
             }
             Column::Float(c) => Vector::Float(c.all_encoded_values()),
             Column::Integer(c) => Vector::Integer(c.all_encoded_values()),
@@ -596,28 +599,25 @@ impl Column {
         }
     }
 
-    pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool {
+    pub fn maybe_contains(&self, value: &Scalar<'_>) -> bool {
         match self {
-            Column::String(c) => match value {
-                Some(scalar) => {
-                    if let Scalar::String(v) = scalar {
-                        c.meta.maybe_contains_value(Some(v.to_string()))
-                    } else {
-                        panic!("invalid value");
-                    }
+            Column::String(c) => {
+                if let Scalar::String(v) = value {
+                    c.meta.maybe_contains_value(v.to_string())
+                } else {
+                    panic!("invalid value");
                 }
-                None => c.meta.maybe_contains_value(None),
-            },
+            }
             Column::Float(c) => {
-                if let Some(Scalar::Float(v)) = value {
-                    c.meta.maybe_contains_value(v.to_owned())
+                if let Scalar::Float(v) = value {
+                    c.meta.maybe_contains_value(*v)
                 } else {
                     panic!("invalid value or unsupported null");
                 }
             }
             Column::Integer(c) => {
-                if let Some(Scalar::Integer(v)) = value {
-                    c.meta.maybe_contains_value(v.to_owned())
+                if let Scalar::Integer(v) = value {
+                    c.meta.maybe_contains_value(*v)
                 } else {
                     panic!("invalid value or unsupported null");
                 }
@@ -626,76 +626,98 @@ impl Column {
     }
 
     /// returns true if the column cannot contain
-    pub fn max_less_than(&self, value: Option<&Scalar<'_>>) -> bool {
+    pub fn max_less_than(&self, value: &Scalar<'_>) -> bool {
         match self {
-            Column::String(c) => match value {
-                Some(scalar) => {
-                    if let Scalar::String(v) = scalar {
-                        c.meta.range().1 < Some(&v.to_string())
+            Column::String(c) => {
+                if let Scalar::String(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.1 < v.to_string()
                     } else {
-                        panic!("invalid value");
+                        false
                     }
-                }
-                None => c.meta.range().1 < None,
-            },
-            Column::Float(c) => {
-                if let Some(Scalar::Float(v)) = value {
-                    c.meta.range().1 < *v
                 } else {
-                    panic!("invalid value or unsupported null");
+                    panic!("invalid value");
+                }
+            }
+            Column::Float(c) => {
+                if let Scalar::Float(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.1 < *v
+                    } else {
+                        false
+                    }
+                } else {
+                    panic!("invalid value");
                 }
             }
             Column::Integer(c) => {
-                if let Some(Scalar::Integer(v)) = value {
-                    c.meta.range().1 < *v
+                if let Scalar::Integer(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.1 < *v
+                    } else {
+                        false
+                    }
                 } else {
-                    panic!("invalid value or unsupported null");
+                    panic!("invalid value");
                 }
             }
         }
     }
 
-    pub fn min_greater_than(&self, value: Option<&Scalar<'_>>) -> bool {
+    // TODO(edd): consolodate with max_less_than... Should just be single cmp function
+    pub fn min_greater_than(&self, value: &Scalar<'_>) -> bool {
         match self {
-            Column::String(c) => match value {
-                Some(scalar) => {
-                    if let Scalar::String(v) = scalar {
-                        c.meta.range().0 > Some(&v.to_string())
+            Column::String(c) => {
+                if let Scalar::String(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.0 > v.to_string()
                     } else {
-                        panic!("invalid value");
+                        false
                     }
-                }
-                None => c.meta.range().0 > None,
-            },
-            Column::Float(c) => {
-                if let Some(Scalar::Float(v)) = value {
-                    c.meta.range().0 > *v
                 } else {
-                    panic!("invalid value or unsupported null");
+                    panic!("invalid value");
+                }
+            }
+            Column::Float(c) => {
+                if let Scalar::Float(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.0 > *v
+                    } else {
+                        false
+                    }
+                } else {
+                    panic!("invalid value");
                 }
             }
             Column::Integer(c) => {
-                if let Some(Scalar::Integer(v)) = value {
-                    c.meta.range().0 > *v
+                if let Scalar::Integer(v) = value {
+                    if let Some(range) = c.meta.range() {
+                        range.0 > *v
+                    } else {
+                        false
+                    }
                 } else {
-                    panic!("invalid value or unsupported null");
+                    panic!("invalid value");
                 }
             }
         }
     }
 
     /// Returns the minimum value contained within this column.
-    // FIXME(edd): Support NULL integers and floats
     pub fn min(&self) -> Option<Scalar<'_>> {
         match self {
-            Column::String(c) => {
-                if let Some(min) = c.meta.range().0 {
-                    return Some(Scalar::String(min));
-                }
-                None
-            }
-            Column::Float(c) => Some(Scalar::Float(c.meta.range().0)),
-            Column::Integer(c) => Some(Scalar::Integer(c.meta.range().0)),
+            Column::String(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::String(&range.0)),
+                None => None,
+            },
+            Column::Float(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::Float(range.0)),
+                None => None,
+            },
+            Column::Integer(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::Integer(range.0)),
+                None => None,
+            },
         }
     }
 
@@ -703,21 +725,28 @@ impl Column {
     // FIXME(edd): Support NULL integers and floats
     pub fn max(&self) -> Option<Scalar<'_>> {
         match self {
-            Column::String(c) => {
-                if let Some(max) = c.meta.range().1 {
-                    return Some(Scalar::String(max));
-                }
-                None
-            }
-            Column::Float(c) => Some(Scalar::Float(c.meta.range().1)),
-            Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)),
+            Column::String(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::String(&range.1)),
+                None => None,
+            },
+            Column::Float(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::Float(range.1)),
+                None => None,
+            },
+            Column::Integer(c) => match c.meta.range() {
+                Some(range) => Some(Scalar::Integer(range.1)),
+                None => None,
+            },
         }
     }
 
     pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar<'_>> {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
-            Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))),
+            Column::Float(c) => match c.sum_by_ids(row_ids) {
+                Some(sum) => Some(Scalar::Float(sum)),
+                None => None,
+            },
             Column::Integer(_) => unimplemented!("not implemented"),
         }
     }
@@ -734,9 +763,10 @@ impl Column {
                 AggregateType::Count => {
                     Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64)
                 }
-                AggregateType::Sum => {
-                    Aggregate::Sum(Scalar::Float(c.sum_by_id_range(from_row_id, to_row_id)))
-                }
+                AggregateType::Sum => match c.sum_by_id_range(from_row_id, to_row_id) {
+                    Some(sum) => Aggregate::Sum(Some(Scalar::Float(sum))),
+                    None => Aggregate::Sum(None),
+                },
             },
 
             Column::Integer(_) => unimplemented!("not implemented"),
@@ -753,20 +783,25 @@ impl Column {
 
     // TODO(edd) shouldn't let roaring stuff leak out...
     pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
+        let value = match value {
+            Some(v) => v,
+            None => return None,
+        };
+
         if !self.maybe_contains(value) {
             return None;
         }
         self.row_ids(value, std::cmp::Ordering::Equal)
     }
 
-    pub fn row_ids_gt(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
         if self.max_less_than(value) {
             return None;
         }
         self.row_ids(value, std::cmp::Ordering::Greater)
     }
 
-    pub fn row_ids_lt(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_lt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
         if self.min_greater_than(value) {
             return None;
         }
@@ -785,9 +820,14 @@ impl Column {
                 unimplemented!("not implemented yet");
             }
             Column::Float(c) => {
-                let (col_min, col_max) = c.meta.range();
+                let (col_min, col_max) = match c.meta.range() {
+                    Some(range) => range,
+                    // no min/max on column which means must be all NULL values.
+                    None => return None,
+                };
+
                 if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) {
-                    if *low <= col_min && *high > col_max {
+                    if low <= col_min && high > col_max {
                         // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
                         // return a full one. Need to differentiate between "no values"
@@ -808,9 +848,14 @@ impl Column {
                 }
             }
             Column::Integer(c) => {
-                let (col_min, col_max) = c.meta.range();
+                let (col_min, col_max) = match c.meta.range() {
+                    Some(range) => range,
+                    // no min/max on column which means must be all NULL values.
+                    None => return None,
+                };
+
                 if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) {
-                    if *low <= col_min && *high > col_max {
+                    if low <= col_min && high > col_max {
                         // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
                         // return a full one. Need to differentiate between "no values"
@@ -834,36 +879,28 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    fn row_ids(
-        &self,
-        value: Option<&Scalar<'_>>,
-        order: std::cmp::Ordering,
-    ) -> Option<croaring::Bitmap> {
+    fn row_ids(&self, value: &Scalar<'_>, order: std::cmp::Ordering) -> Option<croaring::Bitmap> {
         match self {
             Column::String(c) => {
                 if order != std::cmp::Ordering::Equal {
                     unimplemented!("> < not supported on strings yet");
                 }
-                match value {
-                    Some(scalar) => {
-                        if let Scalar::String(v) = scalar {
-                            Some(c.data.row_ids_eq_roaring(Some(v.to_string())))
-                        } else {
-                            panic!("invalid value");
-                        }
-                    }
-                    None => Some(c.data.row_ids_eq_roaring(None)),
+
+                if let Scalar::String(v) = value {
+                    Some(c.data.row_ids_eq_roaring(Some(v.to_string())))
+                } else {
+                    panic!("invalid value");
                 }
             }
             Column::Float(c) => {
-                if let Some(Scalar::Float(v)) = value {
+                if let Scalar::Float(v) = value {
                     Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
                 }
             }
             Column::Integer(c) => {
-                if let Some(Scalar::Integer(v)) = value {
+                if let Scalar::Integer(v) = value {
                     Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
@@ -901,21 +938,21 @@ impl AggregatableByRange for &Column {
     }
 }
 
-impl From<&[f64]> for Column {
-    fn from(values: &[f64]) -> Self {
-        Self::Float(Float::from(values))
-    }
-}
+// impl From<&[f64]> for Column {
+//     fn from(values: &[f64]) -> Self {
+//         Self::Float(Float::from(values))
+//     }
+// }
 
-impl From<&[i64]> for Column {
-    fn from(values: &[i64]) -> Self {
-        Self::Integer(Integer::from(values))
-    }
-}
+// impl From<&[i64]> for Column {
+//     fn from(values: &[i64]) -> Self {
+//         Self::Integer(Integer::from(values))
+//     }
+// }
 
 #[derive(Debug, Default)]
 pub struct String {
-    meta: metadata::Str,
+    meta: metadata::Metadata<std::string::String>,
 
     // TODO(edd): this would probably have multiple possible encodings
     data: encoding::DictionaryRLE,
@@ -940,7 +977,7 @@ impl String {
         self.data.push_additional(s, additional);
     }
 
-    pub fn column_range(&self) -> (Option<&std::string::String>, Option<&std::string::String>) {
+    pub fn column_range(&self) -> &Option<(std::string::String, std::string::String)> {
         self.meta.range()
     }
 
@@ -987,81 +1024,81 @@ impl std::fmt::Display for String {
     }
 }
 
-#[derive(Debug)]
-pub struct Float {
-    meta: metadata::F64,
+// #[derive(Debug)]
+// pub struct Float {
+//     meta: metadata::F64,
 
-    // TODO(edd): compression of float columns
-    // data: encoding::PlainFixed<f64>,
-    data: Box<dyn encoding::NumericEncoding<Item = f64>>,
-}
+//     // TODO(edd): compression of float columns
+//     // data: encoding::PlainFixed<f64>,
+//     data: Box<dyn encoding::NumericEncoding<Item = f64>>,
+// }
 
-impl Float {
-    pub fn column_range(&self) -> (f64, f64) {
-        self.meta.range()
-    }
+// impl Float {
+//     pub fn column_range(&self) -> (f64, f64) {
+//         self.meta.range()
+//     }
 
-    pub fn size(&self) -> usize {
-        self.meta.size() + self.data.size()
-    }
+//     pub fn size(&self) -> usize {
+//         self.meta.size() + self.data.size()
+//     }
 
-    pub fn value(&self, row_id: usize) -> f64 {
-        self.data.value(row_id)
-    }
+//     pub fn value(&self, row_id: usize) -> f64 {
+//         self.data.value(row_id)
+//     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<f64> {
-        self.data.values(row_ids)
-    }
+//     pub fn values(&self, row_ids: &[usize]) -> Vec<f64> {
+//         self.data.values(row_ids)
+//     }
 
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<f64> {
-        self.data.encoded_values(row_ids)
-    }
+//     pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<f64> {
+//         self.data.encoded_values(row_ids)
+//     }
 
-    pub fn all_encoded_values(&self) -> Vec<f64> {
-        self.data.all_encoded_values()
-    }
+//     pub fn all_encoded_values(&self) -> Vec<f64> {
+//         self.data.all_encoded_values()
+//     }
 
-    pub fn scan_from(&self, row_id: usize) -> &[f64] {
-        self.data.scan_from(row_id)
-    }
+//     pub fn scan_from(&self, row_id: usize) -> &[f64] {
+//         self.data.scan_from(row_id)
+//     }
 
-    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 {
-        self.data.sum_by_ids(row_ids)
-    }
+//     pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 {
+//         self.data.sum_by_ids(row_ids)
+//     }
 
-    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 {
-        self.data.sum_by_id_range(from_row_id, to_row_id)
-    }
+//     pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 {
+//         self.data.sum_by_id_range(from_row_id, to_row_id)
+//     }
 
-    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        self.data.count_by_id_range(from_row_id, to_row_id)
-    }
-}
+//     pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+//         self.data.count_by_id_range(from_row_id, to_row_id)
+//     }
+// }
 
-impl std::fmt::Display for Float {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
-    }
-}
+// impl std::fmt::Display for Float {
+//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+//         write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+//     }
+// }
 
-impl From<&[f64]> for Float {
-    fn from(values: &[f64]) -> Self {
-        let len = values.len();
-        let mut min = std::f64::MAX;
-        let mut max = std::f64::MIN;
+// impl From<&[f64]> for Float {
+//     fn from(values: &[f64]) -> Self {
+//         let len = values.len();
+//         let mut min = std::f64::MAX;
+//         let mut max = std::f64::MIN;
 
-        // calculate min/max for meta data
-        for v in values {
-            min = min.min(*v);
-            max = max.max(*v);
-        }
+//         // calculate min/max for meta data
+//         for v in values {
+//             min = min.min(*v);
+//             max = max.max(*v);
+//         }
 
-        Self {
-            meta: metadata::F64::new((min, max), len),
-            data: Box::new(encoding::PlainFixed::from(values)),
-        }
-    }
-}
+//         Self {
+//             meta: metadata::F64::new((min, max), len),
+//             data: Box::new(encoding::PlainFixed::from(values)),
+//         }
+//     }
+// }
 
 // use arrow::array::Array;
 // impl From<arrow::array::PrimitiveArray<arrow::datatypes::Float64Type>> for Float {
@@ -1089,16 +1126,93 @@ impl From<&[f64]> for Float {
 //     }
 // }
 
+// #[derive(Debug)]
+// pub struct Integer {
+//     meta: metadata::Metadata<i64>,
+
+//     // TODO(edd): compression of integers
+//     data: Box<dyn encoding::NumericEncoding<Item = i64>>,
+// }
+
+// impl Integer {
+//     pub fn column_range(&self) -> (Option<&i64>, Option<&i64>) {
+//         self.meta.range()
+//     }
+
+//     pub fn size(&self) -> usize {
+//         self.meta.size() + self.data.size()
+//     }
+
+//     pub fn value(&self, row_id: usize) -> i64 {
+//         self.data.value(row_id)
+//     }
+
+//     pub fn values(&self, row_ids: &[usize]) -> Vec<i64> {
+//         self.data.values(row_ids)
+//     }
+
+//     pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
+//         self.data.encoded_values(row_ids)
+//     }
+
+//     pub fn all_encoded_values(&self) -> Vec<i64> {
+//         self.data.all_encoded_values()
+//     }
+
+//     pub fn scan_from(&self, row_id: usize) -> &[i64] {
+//         self.data.scan_from(row_id)
+//     }
+
+//     /// Find the first logical row that contains this value.
+//     pub fn row_id_eq_value(&self, v: i64) -> Option<usize> {
+//         if !self.meta.maybe_contains_value(v) {
+//             return None;
+//         }
+//         self.data.row_id_eq_value(v)
+//     }
+// }
+
+// impl std::fmt::Display for Integer {
+//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+//         write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+//     }
+// }
+
+// impl From<&[i64]> for Integer {
+//     fn from(values: &[i64]) -> Self {
+//         let len = values.len();
+//         let mut min = std::i64::MAX;
+//         let mut max = std::i64::MIN;
+
+//         // calculate min/max for meta data
+//         for v in values {
+//             min = min.min(*v);
+//             max = max.max(*v);
+//         }
+
+//         Self {
+//             meta: metadata::Metadata::new((Some(min), Some(max)), len),
+//             data: Box::new(encoding::PlainFixed::from(values)),
+//         }
+//     }
+// }
+
 #[derive(Debug)]
-pub struct Integer {
-    meta: metadata::I64,
+pub struct NumericColumn<T>
+where
+    T: Clone + std::cmp::PartialOrd + std::fmt::Debug,
+{
+    meta: metadata::Metadata<T>,
 
     // TODO(edd): compression of integers
-    data: Box<dyn encoding::NumericEncoding<Item = i64>>,
+    data: Box<dyn encoding::NumericEncoding<Item = T>>,
 }
 
-impl Integer {
-    pub fn column_range(&self) -> (i64, i64) {
+impl<T> NumericColumn<T>
+where
+    T: Clone + std::cmp::PartialOrd + std::fmt::Debug,
+{
+    pub fn column_range(&self) -> &Option<(T, T)> {
         self.meta.range()
     }
 
@@ -1106,57 +1220,53 @@ impl Integer {
         self.meta.size() + self.data.size()
     }
 
-    pub fn value(&self, row_id: usize) -> i64 {
+    pub fn value(&self, row_id: usize) -> Option<T> {
         self.data.value(row_id)
     }
 
-    pub fn values(&self, row_ids: &[usize]) -> Vec<i64> {
+    pub fn values(&self, row_ids: &[usize]) -> Vec<Option<T>> {
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T>> {
         self.data.encoded_values(row_ids)
     }
 
-    pub fn all_encoded_values(&self) -> Vec<i64> {
+    pub fn all_encoded_values(&self) -> Vec<Option<T>> {
         self.data.all_encoded_values()
     }
 
-    pub fn scan_from(&self, row_id: usize) -> &[i64] {
+    pub fn scan_from(&self, row_id: usize) -> &[Option<T>] {
         self.data.scan_from(row_id)
     }
 
     /// Find the first logical row that contains this value.
-    pub fn row_id_eq_value(&self, v: i64) -> Option<usize> {
+    pub fn row_id_eq_value(&self, v: T) -> Option<usize> {
         if !self.meta.maybe_contains_value(v) {
             return None;
         }
         self.data.row_id_eq_value(v)
     }
-}
 
-impl std::fmt::Display for Integer {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<T> {
+        self.data.sum_by_ids(row_ids)
+    }
+
+    pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T> {
+        self.data.sum_by_id_range(from_row_id, to_row_id)
+    }
+
+    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+        self.data.count_by_id_range(from_row_id, to_row_id)
     }
 }
 
-impl From<&[i64]> for Integer {
-    fn from(values: &[i64]) -> Self {
-        let len = values.len();
-        let mut min = std::i64::MAX;
-        let mut max = std::i64::MIN;
-
-        // calculate min/max for meta data
-        for v in values {
-            min = min.min(*v);
-            max = max.max(*v);
-        }
-
-        Self {
-            meta: metadata::I64::new((min, max), len),
-            data: Box::new(encoding::PlainFixed::from(values)),
-        }
+impl<T> std::fmt::Display for NumericColumn<T>
+where
+    T: Clone + std::cmp::PartialOrd + std::fmt::Debug,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Meta: {}, Data: {}", self.meta, self.data)
     }
 }
 
@@ -1164,33 +1274,55 @@ pub mod metadata {
     use std::mem::size_of;
 
     #[derive(Debug, Default)]
-    pub struct Str {
-        range: (Option<String>, Option<String>),
+    pub struct Metadata<T>
+    where
+        T: Clone + std::fmt::Debug,
+    {
+        range: Option<(T, T)>,
         num_rows: usize,
     }
 
-    impl Str {
-        pub fn add(&mut self, s: Option<String>) {
-            self.num_rows += 1;
-
-            if s < self.range.0 {
-                self.range.0 = s.clone();
-            }
-
-            if s > self.range.1 {
-                self.range.1 = s;
+    impl<T> Metadata<T>
+    where
+        T: Clone + std::cmp::PartialOrd<T> + std::fmt::Debug,
+    {
+        pub fn new(range: Option<(T, T)>, rows: usize) -> Self {
+            Self {
+                range,
+                num_rows: rows,
             }
         }
 
-        pub fn add_repeated(&mut self, s: Option<String>, additional: usize) {
+        fn update_range(&mut self, v: T) {
+            match self.range {
+                Some(range) => {
+                    if v < range.0 {
+                        range.0 = v;
+                    }
+
+                    if v > range.1 {
+                        range.1 = v;
+                    }
+                }
+                None => {
+                    self.range = Some((v, v));
+                }
+            }
+        }
+
+        pub fn add(&mut self, v: Option<T>) {
+            self.num_rows += 1;
+
+            if let Some(v) = v {
+                self.update_range(v);
+            }
+        }
+
+        pub fn add_repeated(&mut self, v: Option<T>, additional: usize) {
             self.num_rows += additional;
 
-            if s < self.range.0 {
-                self.range.0 = s.clone();
-            }
-
-            if s > self.range.1 {
-                self.range.1 = s;
+            if let Some(v) = v {
+                self.update_range(v);
             }
         }
 
@@ -1198,114 +1330,183 @@ pub mod metadata {
             self.num_rows
         }
 
-        pub fn maybe_contains_value(&self, v: Option<String>) -> bool {
-            self.range.0 <= v && v <= self.range.1
+        pub fn maybe_contains_value(&self, v: T) -> bool {
+            match self.range {
+                Some(range) => range.0 <= v && v <= range.1,
+                None => false,
+            }
         }
 
-        pub fn range(&self) -> (Option<&String>, Option<&String>) {
-            (self.range.0.as_ref(), self.range.1.as_ref())
+        pub fn range(&self) -> &Option<(T, T)> {
+            &self.range
         }
 
         pub fn size(&self) -> usize {
             // size of types for num_rows and range
             let base_size = size_of::<usize>() + (2 * size_of::<Option<String>>());
-            match &self.range {
-                (None, None) => base_size,
-                (Some(min), None) => base_size + min.len(),
-                (None, Some(max)) => base_size + max.len(),
-                (Some(min), Some(max)) => base_size + min.len() + max.len(),
-            }
+
+            //
+            //  TODO: figure out a way to specify that T must be able to describe its runtime size.
+            //
+            // match &self.range {
+            //     (None, None) => base_size,
+            //     (Some(min), None) => base_size + min.len(),
+            //     (None, Some(max)) => base_size + max.len(),
+            //     (Some(min), Some(max)) => base_size + min.len() + max.len(),
+            // }
+            base_size
         }
     }
 
-    impl std::fmt::Display for Str {
+    impl<T: Clone + std::fmt::Debug> std::fmt::Display for Metadata<T> {
         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
             write!(f, "Range: ({:?})", self.range)
         }
     }
 
-    #[derive(Debug, Default)]
-    pub struct F64 {
-        range: (f64, f64),
-        num_rows: usize,
-    }
+    // #[derive(Debug, Default)]
+    // pub struct Str {
+    //     range: (Option<String>, Option<String>),
+    //     num_rows: usize,
+    // }
 
-    impl F64 {
-        pub fn new(range: (f64, f64), rows: usize) -> Self {
-            Self {
-                range,
-                num_rows: rows,
-            }
-        }
+    // impl Str {
+    //     pub fn add(&mut self, s: Option<String>) {
+    //         self.num_rows += 1;
 
-        pub fn maybe_contains_value(&self, v: f64) -> bool {
-            let res = self.range.0 <= v && v <= self.range.1;
-            log::debug!(
-                "column with ({:?}) maybe contain {:?} -- {:?}",
-                self.range,
-                v,
-                res
-            );
-            res
-        }
+    //         if s < self.range.0 {
+    //             self.range.0 = s.clone();
+    //         }
 
-        pub fn num_rows(&self) -> usize {
-            self.num_rows
-        }
+    //         if s > self.range.1 {
+    //             self.range.1 = s;
+    //         }
+    //     }
 
-        pub fn range(&self) -> (f64, f64) {
-            self.range
-        }
+    //     pub fn add_repeated(&mut self, s: Option<String>, additional: usize) {
+    //         self.num_rows += additional;
 
-        pub fn size(&self) -> usize {
-            size_of::<usize>() + (size_of::<(f64, f64)>())
-        }
-    }
+    //         if s < self.range.0 {
+    //             self.range.0 = s.clone();
+    //         }
 
-    impl std::fmt::Display for F64 {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "Range: ({:?})", self.range)
-        }
-    }
+    //         if s > self.range.1 {
+    //             self.range.1 = s;
+    //         }
+    //     }
 
-    #[derive(Debug, Default)]
-    pub struct I64 {
-        range: (i64, i64),
-        num_rows: usize,
-    }
+    //     pub fn num_rows(&self) -> usize {
+    //         self.num_rows
+    //     }
 
-    impl I64 {
-        pub fn new(range: (i64, i64), rows: usize) -> Self {
-            Self {
-                range,
-                num_rows: rows,
-            }
-        }
+    //     pub fn maybe_contains_value(&self, v: Option<String>) -> bool {
+    //         self.range.0 <= v && v <= self.range.1
+    //     }
 
-        pub fn maybe_contains_value(&self, v: i64) -> bool {
-            self.range.0 <= v && v <= self.range.1
-        }
+    //     pub fn range(&self) -> (Option<&String>, Option<&String>) {
+    //         (self.range.0.as_ref(), self.range.1.as_ref())
+    //     }
 
-        pub fn max(&self) -> i64 {
-            self.range.1
-        }
+    //     pub fn size(&self) -> usize {
+    //         // size of types for num_rows and range
+    //         let base_size = size_of::<usize>() + (2 * size_of::<Option<String>>());
+    //         match &self.range {
+    //             (None, None) => base_size,
+    //             (Some(min), None) => base_size + min.len(),
+    //             (None, Some(max)) => base_size + max.len(),
+    //             (Some(min), Some(max)) => base_size + min.len() + max.len(),
+    //         }
+    //     }
+    // }
 
-        pub fn num_rows(&self) -> usize {
-            self.num_rows
-        }
+    // impl std::fmt::Display for Str {
+    //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    //         write!(f, "Range: ({:?})", self.range)
+    //     }
+    // }
 
-        pub fn range(&self) -> (i64, i64) {
-            self.range
-        }
+    // #[derive(Debug, Default)]
+    // pub struct F64 {
+    //     range: (f64, f64),
+    //     num_rows: usize,
+    // }
 
-        pub fn size(&self) -> usize {
-            size_of::<usize>() + (size_of::<(i64, i64)>())
-        }
-    }
+    // impl F64 {
+    //     pub fn new(range: (f64, f64), rows: usize) -> Self {
+    //         Self {
+    //             range,
+    //             num_rows: rows,
+    //         }
+    //     }
 
-    impl std::fmt::Display for I64 {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, "Range: ({:?})", self.range)
-        }
-    }
+    //     pub fn maybe_contains_value(&self, v: f64) -> bool {
+    //         let res = self.range.0 <= v && v <= self.range.1;
+    //         log::debug!(
+    //             "column with ({:?}) maybe contain {:?} -- {:?}",
+    //             self.range,
+    //             v,
+    //             res
+    //         );
+    //         res
+    //     }
+
+    //     pub fn num_rows(&self) -> usize {
+    //         self.num_rows
+    //     }
+
+    //     pub fn range(&self) -> (f64, f64) {
+    //         self.range
+    //     }
+
+    //     pub fn size(&self) -> usize {
+    //         size_of::<usize>() + (size_of::<(f64, f64)>())
+    //     }
+    // }
+
+    // impl std::fmt::Display for F64 {
+    //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    //         write!(f, "Range: ({:?})", self.range)
+    //     }
+    // }
+
+    // #[derive(Debug, Default)]
+    // pub struct I64 {
+    //     range: (i64, i64),
+    //     num_rows: usize,
+    // }
+
+    // impl I64 {
+    //     pub fn new(range: (i64, i64), rows: usize) -> Self {
+    //         Self {
+    //             range,
+    //             num_rows: rows,
+    //         }
+    //     }
+
+    //     pub fn maybe_contains_value(&self, v: i64) -> bool {
+    //         self.range.0 <= v && v <= self.range.1
+    //     }
+
+    //     pub fn max(&self) -> i64 {
+    //         self.range.1
+    //     }
+
+    //     pub fn num_rows(&self) -> usize {
+    //         self.num_rows
+    //     }
+
+    //     pub fn range(&self) -> (i64, i64) {
+    //         self.range
+    //     }
+
+    //     pub fn size(&self) -> usize {
+    //         size_of::<usize>() + (size_of::<(i64, i64)>())
+    //     }
+    // }
+
+    // impl std::fmt::Display for I64 {
+    //     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    //         write!(f, "Range: ({:?})", self.range)
+    //     }
+    // }
 }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 54b90b0a72..8e10f8a17a 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -9,14 +9,14 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
     type Item;
 
     fn size(&self) -> usize;
-    fn value(&self, row_id: usize) -> Self::Item;
-    fn values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
-    fn all_encoded_values(&self) -> Vec<Self::Item>;
-    fn scan_from(&self, row_id: usize) -> &[Self::Item];
+    fn value(&self, row_id: usize) -> Option<Self::Item>;
+    fn values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>>;
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>>;
+    fn all_encoded_values(&self) -> Vec<Option<Self::Item>>;
+    fn scan_from(&self, row_id: usize) -> &[Option<Self::Item>];
 
-    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item;
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item;
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Self::Item>;
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Self::Item>;
 
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
     fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64;
@@ -56,7 +56,7 @@ where
         + std::fmt::Debug
         + std::ops::Add<Output = T::Native>,
 {
-    type Item = Option<T::Native>;
+    type Item = T::Native;
 
     fn size(&self) -> usize {
         self.arr.len()
@@ -122,31 +122,41 @@ where
         // where you accept an array.
         let mut res = T::Native::default();
         let vec = row_ids.to_vec();
+        let mut non_null = false;
         for row_id in vec {
             let i = row_id as usize;
             if self.arr.is_null(i) {
-                return None;
+                continue; // skip NULL values
             }
+            non_null = true;
             res = res + self.arr.value(i);
         }
-        Some(res)
+
+        // TODO: ghetto.
+        if non_null {
+            Some(res)
+        } else {
+            None
+        }
     }
 
     fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<T::Native> {
-        // if the column contains a null value between the range then the result
-        // will be None.
+        let mut res = T::Native::default();
+        let mut non_null = false;
+
         for i in from_row_id..to_row_id {
             if self.arr.is_null(i) {
-                return None;
+                continue;
             }
-        }
-
-        // Otherwise sum all the values between in the range.
-        let mut res = T::Native::default();
-        for i in from_row_id..to_row_id {
+            non_null = true;
             res = res + self.arr.value(i);
         }
-        Some(res)
+
+        if non_null {
+            Some(res)
+        } else {
+            None
+        }
     }
 
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
@@ -158,7 +168,7 @@ where
             }
             count += 1;
         }
-        count
+        count // if there are no non-null rows the result is 0 rather than NULL
     }
 
     fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
@@ -256,23 +266,23 @@ where
     }
 
     // get value at row_id. Panics if out of bounds.
-    fn value(&self, row_id: usize) -> T {
-        self.values[row_id]
+    fn value(&self, row_id: usize) -> Option<Self::Item> {
+        Some(self.values[row_id])
     }
 
     /// Return the decoded values for the provided logical row ids.
-    fn values(&self, row_ids: &[usize]) -> Vec<T> {
+    fn values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>> {
         let mut out = Vec::with_capacity(row_ids.len());
         for chunks in row_ids.chunks_exact(4) {
-            out.push(self.values[chunks[3]]);
-            out.push(self.values[chunks[2]]);
-            out.push(self.values[chunks[1]]);
-            out.push(self.values[chunks[0]]);
+            out.push(Some(self.values[chunks[3]]));
+            out.push(Some(self.values[chunks[2]]));
+            out.push(Some(self.values[chunks[1]]));
+            out.push(Some(self.values[chunks[0]]));
         }
 
         let rem = row_ids.len() % 4;
         for &i in &row_ids[row_ids.len() - rem..row_ids.len()] {
-            out.push(self.values[i]);
+            out.push(Some(self.values[i]));
         }
 
         assert_eq!(out.len(), row_ids.len());
@@ -281,18 +291,19 @@ where
 
     /// Return the raw encoded values for the provided logical row ids. For Plain
     /// encoding this is just the decoded values.
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>> {
         self.values(row_ids)
     }
 
     /// Return all encoded values. For this encoding this is just the decoded
     /// values
-    fn all_encoded_values(&self) -> Vec<T> {
-        self.values.clone()
+    fn all_encoded_values(&self) -> Vec<Option<Self::Item>> {
+        self.values.iter().map(|x| Some(*x)).collect::<Vec<_>>()
     }
 
-    fn scan_from(&self, row_id: usize) -> &[T] {
-        &self.values[row_id..]
+    fn scan_from(&self, row_id: usize) -> &[Option<Self::Item>] {
+        unimplemented!("this should probably take a destination vector or maybe a closure");
+        // &self.values[row_id..]
     }
 
     /// returns a set of row ids that match a single ordering on a desired value
@@ -371,12 +382,12 @@ where
         bm
     }
 
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Self::Item> {
         let mut res = T::default();
         for v in self.values[from_row_id..to_row_id].iter() {
             res += *v;
         }
-        res
+        Some(res)
     }
 
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
@@ -384,64 +395,18 @@ where
     }
 
     // TODO(edd): make faster
-    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T {
+    fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Self::Item> {
         let mut res = T::default();
-        // println!(
-        //     "cardinality is {:?} out of {:?}",
-        //     row_ids.cardinality(),
-        //     self.values.len()
-        // );
 
-        // HMMMMM - materialising which has a memory cost.
-        // let vec = row_ids.to_vec();
-        // for v in vec.chunks_exact(4) {
-        //     res += self.value(v[0] as usize);
-        //     res += self.value(v[1] as usize);
-        //     res += self.value(v[2] as usize);
-        //     res += self.value(v[3] as usize);
-        // }
-
-        // HMMMMM - materialising which has a memory cost.
+        // Consider accepting a vec of ids if those ids need to be used again
+        // across other columns.
         let vec = row_ids.to_vec();
         for v in vec {
-            res += self.value(v as usize);
+            //  Todo(edd): this could benefit from unrolling (maybe)
+            res += self.values[v as usize];
         }
 
-        // for v in row_ids.iter() {
-        //     res += self.value(v as usize);
-        // }
-
-        // let step = 16_u64;
-        // for i in (0..self.values.len() as u64).step_by(step as usize) {
-        //     if row_ids.contains_range(i..i + step) {
-        //         res += self.value(i as usize + 15);
-        //         res += self.value(i as usize + 14);
-        //         res += self.value(i as usize + 13);
-        //         res += self.value(i as usize + 12);
-        //         res += self.value(i as usize + 11);
-        //         res += self.value(i as usize + 10);
-        //         res += self.value(i as usize + 9);
-        //         res += self.value(i as usize + 8);
-        //         res += self.value(i as usize + 7);
-        //         res += self.value(i as usize + 6);
-        //         res += self.value(i as usize + 5);
-        //         res += self.value(i as usize + 4);
-        //         res += self.value(i as usize + 3);
-        //         res += self.value(i as usize + 2);
-        //         res += self.value(i as usize + 1);
-        //         res += self.value(i as usize);
-        //         continue;
-        //     }
-
-        //     for j in i..i + step {
-        //         if row_ids.contains(j as u32) {
-        //             res += self.value(j as usize);
-        //         }
-        //     }
-        //  }
-
-        // row_ids.iter().for_each(|x| res += self.value(x as usize));
-        res
+        Some(res)
     }
 
     fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
@@ -453,7 +418,6 @@ impl From<&[i64]> for PlainFixed<i64> {
     fn from(v: &[i64]) -> Self {
         Self {
             values: v.to_vec(),
-            // buf: Vec::with_capacity(v.len()),
             total_order: false,
             size: size_of::<Vec<i64>>()
                 + (size_of::<i64>() * v.len())
@@ -467,7 +431,6 @@ impl From<&[f64]> for PlainFixed<f64> {
     fn from(v: &[f64]) -> Self {
         Self {
             values: v.to_vec(),
-            // buf: Vec::with_capacity(v.len()),
             total_order: false,
             size: size_of::<Vec<f64>>()
                 + (size_of::<f64>() * v.len())

From 1968b654ccb0b9412248fb7c07807f51eb5bee7d Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Mon, 14 Sep 2020 12:32:49 +0100
Subject: [PATCH 56/73] refactor: fix vector support

---
 delorean_mem_qe/src/column.rs | 168 +++++++++++++++++++++++++---------
 1 file changed, 123 insertions(+), 45 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 4faffdc4da..ccc2f053a2 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -2,6 +2,13 @@ use std::convert::From;
 
 use super::encoding;
 
+#[derive(Debug, PartialEq, PartialOrd, Clone)]
+pub enum Value<'a> {
+    Null,
+    String(&'a str),
+    Scalar(Scalar<'a>),
+}
+
 #[derive(Debug, PartialEq, PartialOrd, Clone)]
 pub enum Scalar<'a> {
     String(&'a str),
@@ -162,49 +169,94 @@ pub enum Vector<'a> {
     EncodedString(Vec<i64>),
     Float(Vec<Option<f64>>),
     Integer(Vec<Option<i64>>),
+    // TODO(edd): add types like this:
+    //
+    // Integer16(Vec<i16>),
+    // NullInteger16(Vec<Option<i16>>), // contains one or more NULL values
+    // ...
+    // ...
+    //
+    // We won't need EncodedString then (it can use one of the non-null integer variants)
+    //
 }
 
 impl<'a> Vector<'a> {
-    // pub fn aggregate_by_id_range(
-    //     &self,
-    //     agg_type: &AggregateType,
-    //     from_row_id: usize,
-    //     to_row_id: usize,
-    // ) -> Aggregate<'a> {
-    //     match agg_type {
-    //         AggregateType::Count => {
-    //             Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
-    //         }
-    //         AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
-    //     }
-    // }
+    pub fn aggregate_by_id_range(
+        &self,
+        agg_type: &AggregateType,
+        from_row_id: usize,
+        to_row_id: usize,
+    ) -> Aggregate<'a> {
+        match agg_type {
+            AggregateType::Count => {
+                Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
+            }
+            AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
+        }
+    }
 
-    // fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> {
-    //     match self {
-    //         Vector::String(_) => {
-    //             panic!("can't sum strings....");
-    //         }
-    //         Vector::Float(values) => {
-    //             let mut res = 0.0;
-    //             // TODO(edd): check asm to see if it's vectorising
-    //             for v in values[from_row_id..to_row_id].iter() {
-    //                 res += *v;
-    //             }
-    //             Scalar::Float(res)
-    //         }
-    //         Vector::Integer(values) => {
-    //             let mut res = 0;
-    //             // TODO(edd): check asm to see if it's vectorising
-    //             for v in values[from_row_id..to_row_id].iter() {
-    //                 res += *v;
-    //             }
-    //             Scalar::Integer(res)
-    //         }
-    //     }
-    // }
+    // Return the sum of values in the vector. NULL values are ignored. If there
+    // are no non-null values in the vector being summed then None is returned.
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Scalar<'a>> {
+        match self {
+            Vector::String(_) => {
+                panic!("can't sum strings....");
+            }
+            Vector::Float(values) => {
+                let mut res = 0.0;
+                let mut found = false;
 
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    if let Some(v) = v {
+                        res += *v;
+                        found = true;
+                    }
+                }
+
+                if found {
+                    return Some(Scalar::Float(res));
+                }
+                None
+            }
+            Vector::Integer(values) => {
+                let mut res = 0;
+                let mut found = false;
+
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    if let Some(v) = v {
+                        res += *v;
+                        found = true;
+                    }
+                }
+
+                if found {
+                    return Some(Scalar::Integer(res));
+                }
+                None
+            }
+            Vector::EncodedString(values) => {
+                let mut res = 0;
+
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    res += *v;
+                }
+                Some(Scalar::Integer(res))
+            }
+        }
+    }
+
+    // return the count of values on the column. NULL values do not contribute
+    // to the count.
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        to_row_id - from_row_id
+        match self {
+            Vector::String(vec) => vec.iter().filter(|x| x.is_some()).count(),
+            Vector::EncodedString(_) => to_row_id - from_row_id, // fast - no possible NULL values
+            Vector::Float(vec) => vec.iter().filter(|x| x.is_some()).count(),
+            Vector::Integer(vec) => vec.iter().filter(|x| x.is_some()).count(),
+        }
     }
 
     pub fn extend(&mut self, other: Self) {
@@ -230,6 +282,13 @@ impl<'a> Vector<'a> {
                     unreachable!("string can't be extended");
                 }
             }
+            Vector::EncodedString(v) => {
+                if let Self::EncodedString(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
         }
     }
 
@@ -242,15 +301,27 @@ impl<'a> Vector<'a> {
             Self::String(v) => v.len(),
             Self::Float(v) => v.len(),
             Self::Integer(v) => v.len(),
+            Vector::EncodedString(v) => v.len(),
         }
     }
 
-    pub fn get(&self, i: usize) -> Scalar<'a> {
+    /// Return the value within the vector at position `i`. If the value at
+    /// position `i` is NULL then `None` is returned.
+    pub fn get(&self, i: usize) -> Value<'a> {
         match self {
-            // FIXME(edd): SORT THIS OPTION OUT
-            Self::String(v) => Scalar::String(v[i].as_ref().unwrap()),
-            Self::Float(v) => Scalar::Float(v[i]),
-            Self::Integer(v) => Scalar::Integer(v[i]),
+            Self::String(v) => match v[i] {
+                Some(v) => Value::String(v),
+                None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()),
+            },
+            Self::Float(v) => match v[i] {
+                Some(v) => Value::Scalar(Scalar::Float(v)),
+                None => Value::Null,
+            },
+            Self::Integer(v) => match v[i] {
+                Some(v) => Value::Scalar(Scalar::Integer(v)),
+                None => Value::Null,
+            },
+            Self::EncodedString(v) => Value::Scalar(Scalar::Integer(v[i])),
         }
     }
 
@@ -265,6 +336,7 @@ impl<'a> Vector<'a> {
             Self::Integer(v) => {
                 v.swap(a, b);
             }
+            Vector::EncodedString(v) => v.swap(a, b),
         }
     }
 }
@@ -293,7 +365,7 @@ impl<'a> VectorIterator<'a> {
     }
 }
 impl<'a> Iterator for VectorIterator<'a> {
-    type Item = Scalar<'a>;
+    type Item = Value<'a>;
 
     fn next(&mut self) -> Option<Self::Item> {
         let curr_i = self.next_i;
@@ -316,11 +388,17 @@ impl<'a> std::fmt::Display for Vector<'a> {
             Self::Float(v) => write!(f, "{:?}", v),
             Self::Integer(v) => {
                 for x in v.iter() {
-                    let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0);
-                    write!(f, "{}, ", ts)?;
+                    match x {
+                        Some(x) => {
+                            let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0);
+                            write!(f, "{}, ", ts)?;
+                        }
+                        None => write!(f, "NULL, ")?,
+                    }
                 }
                 Ok(())
             }
+            Vector::EncodedString(v) => write!(f, "{:?}", v),
         }
     }
 }

From a107da6dfe8be6e630b71338895ecb7a2eac10a0 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 15 Sep 2020 13:21:44 +0100
Subject: [PATCH 57/73] refactor: temp add not null materialised vectors

---
 delorean_mem_qe/src/encoding.rs | 67 +++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 20 deletions(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 8e10f8a17a..1945ae3224 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -11,14 +11,16 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
     fn size(&self) -> usize;
     fn value(&self, row_id: usize) -> Option<Self::Item>;
     fn values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>>;
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>>;
-    fn all_encoded_values(&self) -> Vec<Option<Self::Item>>;
+
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item>;
+    fn all_encoded_values(&self) -> Vec<Self::Item>;
+
     fn scan_from(&self, row_id: usize) -> &[Option<Self::Item>];
 
     fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Self::Item>;
     fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Self::Item>;
 
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize;
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64;
     fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64;
 
     fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
@@ -82,20 +84,31 @@ where
         out
     }
 
-    /// Well this is terribly slow
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T::Native>> {
-        self.values(row_ids)
+    /// encoded_values returns encoded values for the encoding. If the encoding
+    /// supports null values then the values returned are undefined.
+    ///
+    /// encoded_values should not be called on nullable columns.
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<T::Native> {
+        // assertion here during development to check this isn't called on
+        // encodings that can have null values.
+        assert_eq!(self.arr.null_count(), 0);
+
+        let mut out = Vec::with_capacity(row_ids.len());
+        for &row_id in row_ids {
+            out.push(self.arr.value(row_id));
+        }
+        assert_eq!(out.len(), row_ids.len());
+        out
     }
 
-    /// TODO(edd): there must be a more efficient way.
-    fn all_encoded_values(&self) -> Vec<Option<T::Native>> {
+    fn all_encoded_values(&self) -> Vec<T::Native> {
+        // assertion here during development to check this isn't called on
+        // encodings that can have null values.
+        assert_eq!(self.arr.null_count(), 0);
+
         let mut out = Vec::with_capacity(self.arr.len());
         for i in 0..self.arr.len() {
-            if self.arr.is_null(i) {
-                out.push(None)
-            } else {
-                out.push(Some(self.arr.value(i)))
-            }
+            out.push(self.arr.value(i));
         }
         assert_eq!(out.len(), self.arr.len());
         out
@@ -159,7 +172,7 @@ where
         }
     }
 
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
         // TODO - count values that are not null in the row range.
         let mut count = 0;
         for i in from_row_id..to_row_id {
@@ -291,14 +304,28 @@ where
 
     /// Return the raw encoded values for the provided logical row ids. For Plain
     /// encoding this is just the decoded values.
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<Self::Item>> {
-        self.values(row_ids)
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+        let mut out = Vec::with_capacity(row_ids.len());
+        for chunks in row_ids.chunks_exact(4) {
+            out.push(self.values[chunks[3]]);
+            out.push(self.values[chunks[2]]);
+            out.push(self.values[chunks[1]]);
+            out.push(self.values[chunks[0]]);
+        }
+
+        let rem = row_ids.len() % 4;
+        for &i in &row_ids[row_ids.len() - rem..row_ids.len()] {
+            out.push(self.values[i]);
+        }
+
+        assert_eq!(out.len(), row_ids.len());
+        out
     }
 
     /// Return all encoded values. For this encoding this is just the decoded
     /// values
-    fn all_encoded_values(&self) -> Vec<Option<Self::Item>> {
-        self.values.iter().map(|x| Some(*x)).collect::<Vec<_>>()
+    fn all_encoded_values(&self) -> Vec<Self::Item> {
+        self.values.clone() // TODO(edd):perf probably can return reference to vec.
     }
 
     fn scan_from(&self, row_id: usize) -> &[Option<Self::Item>] {
@@ -390,8 +417,8 @@ where
         Some(res)
     }
 
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
-        to_row_id - from_row_id
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
+        (to_row_id - from_row_id) as u64
     }
 
     // TODO(edd): make faster

From ba39d731e0ac25cb05d410aa42136a8402e90a8b Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Wed, 16 Sep 2020 16:59:00 +0100
Subject: [PATCH 58/73] refactor: get build working

---
 delorean_mem_qe/src/column.rs   | 415 +++++++++++++++++++++++++-------
 delorean_mem_qe/src/encoding.rs |  42 +---
 delorean_mem_qe/src/segment.rs  | 118 +++++----
 delorean_mem_qe/src/sorter.rs   |  13 +-
 4 files changed, 416 insertions(+), 172 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index ccc2f053a2..3b27743c2b 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -14,6 +14,7 @@ pub enum Scalar<'a> {
     String(&'a str),
     Float(f64),
     Integer(i64),
+    Unsigned32(u32),
 }
 
 impl<'a> Scalar<'a> {
@@ -28,6 +29,9 @@ impl<'a> Scalar<'a> {
             Scalar::Integer(v) => {
                 *v = 0;
             }
+            Scalar::Unsigned32(v) => {
+                *v = 0;
+            }
         }
     }
 
@@ -47,6 +51,13 @@ impl<'a> Scalar<'a> {
                     panic!("invalid");
                 };
             }
+            Self::Unsigned32(v) => {
+                if let Self::Unsigned32(other) = other {
+                    *v += other;
+                } else {
+                    panic!("invalid");
+                };
+            }
             Self::String(_) => {
                 unreachable!("not possible to add strings");
             }
@@ -54,6 +65,39 @@ impl<'a> Scalar<'a> {
     }
 }
 
+impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> {
+    type Output = Scalar<'a>;
+
+    fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
+        match *self {
+            Scalar::Float(v) => {
+                if let Scalar::Float(other) = _rhs {
+                    Scalar::Float(v + other)
+                } else {
+                    panic!("invalid");
+                }
+            }
+            Scalar::Integer(v) => {
+                if let Scalar::Integer(other) = _rhs {
+                    Scalar::Integer(v + other)
+                } else {
+                    panic!("invalid");
+                }
+            }
+            Scalar::Unsigned32(v) => {
+                if let Scalar::Unsigned32(other) = _rhs {
+                    Scalar::Unsigned32(v + other)
+                } else {
+                    panic!("invalid");
+                }
+            }
+            Scalar::String(_) => {
+                unreachable!("not possible to add strings");
+            }
+        }
+    }
+}
+
 impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
     type Output = Scalar<'a>;
 
@@ -73,6 +117,13 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
                     panic!("invalid");
                 }
             }
+            Self::Unsigned32(v) => {
+                if let Self::Unsigned32(other) = _rhs {
+                    Self::Unsigned32(v + other)
+                } else {
+                    panic!("invalid");
+                }
+            }
             Self::String(_) => {
                 unreachable!("not possible to add strings");
             }
@@ -80,6 +131,37 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
     }
 }
 
+impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> {
+    fn add_assign(&mut self, _rhs: &Scalar<'a>) {
+        match self {
+            Scalar::Float(v) => {
+                if let Scalar::Float(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Scalar::Integer(v) => {
+                if let Scalar::Integer(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Scalar::Unsigned32(v) => {
+                if let Scalar::Unsigned32(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
+            Scalar::String(_) => {
+                unreachable!("not possible to add strings");
+            }
+        }
+    }
+}
+
 impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
     fn add_assign(&mut self, _rhs: &Scalar<'a>) {
         match self {
@@ -97,6 +179,13 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
                     panic!("invalid");
                 };
             }
+            Self::Unsigned32(v) => {
+                if let Self::Unsigned32(other) = _rhs {
+                    *v += *other;
+                } else {
+                    panic!("invalid");
+                };
+            }
             Self::String(_) => {
                 unreachable!("not possible to add strings");
             }
@@ -107,6 +196,8 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
 #[derive(Clone, Debug)]
 pub enum Aggregate<'a> {
     Count(u64),
+    // Sum can be `None` is for example all values being aggregated are themselves
+    // `None`.
     Sum(Option<Scalar<'a>>),
 }
 
@@ -116,44 +207,71 @@ pub enum AggregateType {
     Sum,
 }
 
-// impl<'a> std::ops::Add<Scalar<'a>> for Aggregate<'a> {
+// impl<'a> std::ops::Add<&Option<Scalar<'a>>> for Aggregate<'a> {
 //     type Output = Aggregate<'a>;
 
-//     fn add(self, _rhs: Scalar<'a>) -> Self::Output {
+//     fn add(self, _rhs: &Option<Scalar<'a>>) -> Self::Output {
 //         match self {
-//             Self::Count(c) => Self::Count(c + 1),
-//             Self::Sum(s) => Self::Sum(s + &_rhs),
+//             Self::Count(self_count) => match _rhs {
+//                 Some(other_scalar) => match other_scalar {
+//                     Scalar::String(_) => panic!("todo - remove String scalar"),
+//                     Scalar::Float(_) => panic!("cannot add floating point value to a count"),
+//                     Scalar::Integer(v) => Self::Count(self_count + *v as u64),
+//                     Scalar::Unsigned32(v) => Self::Count(self_count + *v as u64),
+//                 },
+//                 None => self,
+//             },
+//             // SUM ignores NULL values. Initially an aggregate sum is `None`, but
+//             // as soon as a non-null value is shown then it becomes `Some`.
+//             Self::Sum(self_sum) => match (self_sum, _rhs) {
+//                 (None, None) => Self::Sum(None),
+//                 (None, Some(other_scalar)) => match other_scalar {
+//                     Scalar::String(_) => panic!("todo - remove String scalar"),
+//                     Scalar::Float(_) => Self::Sum(Some(other_scalar.clone())),
+//                     Scalar::Integer(_) => Self::Sum(Some(other_scalar.clone())),
+//                     Scalar::Unsigned32(_) => Self::Sum(Some(other_scalar.clone())),
+//                 },
+//                 (Some(_self), None) => Self::Sum(Some(_self.clone())),
+//                 (Some(self_scalar), Some(other_scalar)) => match other_scalar {
+//                     Scalar::String(_) => panic!("todo - remove String scalar"),
+//                     Scalar::Float(_) => Self::Sum(Some(self_scalar + &other_scalar)),
+//                     Scalar::Integer(_) => Self::Sum(Some(self_scalar + &other_scalar)),
+//                     Scalar::Unsigned32(_) => Self::Sum(Some(self_scalar + &other_scalar)),
+//                 },
+//             },
 //         }
 //     }
 // }
 
-impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
-    type Output = Aggregate<'a>;
+// impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
+//     type Output = Aggregate<'a>;
 
-    fn add(self, _rhs: &Aggregate<'a>) -> Self::Output {
-        match self {
-            Self::Count(c) => {
-                if let Self::Count(other) = _rhs {
-                    Self::Count(c + other)
-                } else {
-                    panic!("invalid");
-                }
-            }
-            Self::Sum(s) => {
-                if let Self::Sum(other) = _rhs {
-                    match (s, other) {
-                        (None, None) => Self::Sum(None),
-                        (None, Some(other)) => Self::Sum(Some(*other)),
-                        (Some(s), None) => Self::Sum(Some(s)),
-                        (Some(s), Some(other)) => Self::Sum(Some(s + other)),
-                    }
-                } else {
-                    panic!("invalid");
-                }
-            }
-        }
-    }
-}
+//     fn add(self, _rhs: &Aggregate<'a>) -> Self::Output {
+//         match self {
+//             Self::Count(self_count) => {
+//                 if let Self::Count(other) = _rhs {
+//                     Self::Count(self_count + *other)
+//                 } else {
+//                     panic!("can't combine count with other aggregate type");
+//                 }
+//             }
+//             // SUM ignores NULL values. Initially an aggregate sum is `None`, but
+//             // as soon as a non-null value is shown then it becomes `Some`.
+//             Self::Sum(self_sum) => {
+//                 if let Self::Sum(other) = _rhs {
+//                     match (self_sum, other) {
+//                         (None, None) => Self::Sum(None),
+//                         (None, Some(_)) => Self::Sum(*other),
+//                         (Some(_), None) => self,
+//                         (Some(s), Some(other)) => Self::Sum(Some(s + other)),
+//                     }
+//                 } else {
+//                     panic!("invalid");
+//                 }
+//             }
+//         }
+//     }
+// }
 
 pub trait AggregatableByRange {
     fn aggregate_by_id_range(
@@ -163,12 +281,16 @@ pub trait AggregatableByRange {
         to_row_id: usize,
     ) -> Aggregate<'_>;
 }
+
 /// A Vector is a materialised vector of values from a column.
 pub enum Vector<'a> {
-    String(Vec<&'a Option<std::string::String>>),
-    EncodedString(Vec<i64>),
-    Float(Vec<Option<f64>>),
-    Integer(Vec<Option<i64>>),
+    NullString(Vec<&'a Option<std::string::String>>),
+    NullFloat(Vec<Option<f64>>),
+    NullInteger(Vec<Option<i64>>),
+
+    Float(Vec<f64>),
+    Integer(Vec<i64>),
+    Unsigned32(Vec<u32>),
     // TODO(edd): add types like this:
     //
     // Integer16(Vec<i16>),
@@ -189,7 +311,7 @@ impl<'a> Vector<'a> {
     ) -> Aggregate<'a> {
         match agg_type {
             AggregateType::Count => {
-                Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64)
+                Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id))
             }
             AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)),
         }
@@ -199,12 +321,12 @@ impl<'a> Vector<'a> {
     // are no non-null values in the vector being summed then None is returned.
     fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Scalar<'a>> {
         match self {
-            Vector::String(_) => {
+            Self::NullString(_) => {
                 panic!("can't sum strings....");
             }
-            Vector::Float(values) => {
+            Self::NullFloat(values) => {
                 let mut res = 0.0;
-                let mut found = false;
+                let mut found = false; // TODO(edd): check if this is faster than a match.
 
                 // TODO(edd): check asm to see if it's vectorising
                 for v in values[from_row_id..to_row_id].iter() {
@@ -219,7 +341,16 @@ impl<'a> Vector<'a> {
                 }
                 None
             }
-            Vector::Integer(values) => {
+            Self::Float(values) => {
+                let mut res = 0.0;
+
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    res += *v;
+                }
+                Some(Scalar::Float(res))
+            }
+            Self::NullInteger(values) => {
                 let mut res = 0;
                 let mut found = false;
 
@@ -236,7 +367,7 @@ impl<'a> Vector<'a> {
                 }
                 None
             }
-            Vector::EncodedString(values) => {
+            Self::Integer(values) => {
                 let mut res = 0;
 
                 // TODO(edd): check asm to see if it's vectorising
@@ -245,24 +376,58 @@ impl<'a> Vector<'a> {
                 }
                 Some(Scalar::Integer(res))
             }
+            Self::Unsigned32(values) => {
+                let mut res = 0;
+
+                // TODO(edd): check asm to see if it's vectorising
+                for v in values[from_row_id..to_row_id].iter() {
+                    res += *v;
+                }
+                Some(Scalar::Unsigned32(res))
+            }
         }
     }
 
     // return the count of values on the column. NULL values do not contribute
     // to the count.
-    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+    fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
         match self {
-            Vector::String(vec) => vec.iter().filter(|x| x.is_some()).count(),
-            Vector::EncodedString(_) => to_row_id - from_row_id, // fast - no possible NULL values
-            Vector::Float(vec) => vec.iter().filter(|x| x.is_some()).count(),
-            Vector::Integer(vec) => vec.iter().filter(|x| x.is_some()).count(),
+            Self::NullString(vec) => {
+                let count = vec.iter().filter(|x| x.is_some()).count();
+                count as u64
+            }
+            Self::NullFloat(vec) => {
+                let count = vec.iter().filter(|x| x.is_some()).count();
+                count as u64
+            }
+            Self::NullInteger(vec) => {
+                let count = vec.iter().filter(|x| x.is_some()).count();
+                count as u64
+            }
+            Self::Float(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
+            Self::Integer(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
+            Self::Unsigned32(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
         }
     }
 
     pub fn extend(&mut self, other: Self) {
         match self {
-            Self::String(v) => {
-                if let Self::String(other) = other {
+            Self::NullString(v) => {
+                if let Self::NullString(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::NullFloat(v) => {
+                if let Self::NullFloat(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::NullInteger(v) => {
+                if let Self::NullInteger(other) = other {
                     v.extend(other);
                 } else {
                     unreachable!("string can't be extended");
@@ -282,8 +447,8 @@ impl<'a> Vector<'a> {
                     unreachable!("string can't be extended");
                 }
             }
-            Vector::EncodedString(v) => {
-                if let Self::EncodedString(other) = other {
+            Self::Unsigned32(v) => {
+                if let Self::Unsigned32(other) = other {
                     v.extend(other);
                 } else {
                     unreachable!("string can't be extended");
@@ -298,10 +463,12 @@ impl<'a> Vector<'a> {
 
     pub fn len(&self) -> usize {
         match self {
-            Self::String(v) => v.len(),
+            Self::NullString(v) => v.len(),
+            Self::NullFloat(v) => v.len(),
+            Self::NullInteger(v) => v.len(),
             Self::Float(v) => v.len(),
             Self::Integer(v) => v.len(),
-            Vector::EncodedString(v) => v.len(),
+            Self::Unsigned32(v) => v.len(),
         }
     }
 
@@ -309,25 +476,54 @@ impl<'a> Vector<'a> {
     /// position `i` is NULL then `None` is returned.
     pub fn get(&self, i: usize) -> Value<'a> {
         match self {
-            Self::String(v) => match v[i] {
+            Self::NullString(v) => match v[i] {
                 Some(v) => Value::String(v),
                 None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()),
             },
-            Self::Float(v) => match v[i] {
+            Self::NullFloat(v) => match v[i] {
                 Some(v) => Value::Scalar(Scalar::Float(v)),
                 None => Value::Null,
             },
-            Self::Integer(v) => match v[i] {
+            Self::NullInteger(v) => match v[i] {
                 Some(v) => Value::Scalar(Scalar::Integer(v)),
                 None => Value::Null,
             },
-            Self::EncodedString(v) => Value::Scalar(Scalar::Integer(v[i])),
+            Self::Float(v) => Value::Scalar(Scalar::Float(v[i])),
+            Self::Integer(v) => Value::Scalar(Scalar::Integer(v[i])),
+            Self::Unsigned32(v) => Value::Scalar(Scalar::Unsigned32(v[i])),
+        }
+    }
+
+    /// Return the value within the vector at position `i`. If the value at
+    /// position `i` is NULL then `None` is returned.
+    //
+    // TODO - sort out
+    pub fn get_scalar(&self, i: usize) -> Option<Scalar<'a>> {
+        match self {
+            Self::NullString(_) => panic!("unsupported get_scalar"),
+            Self::NullFloat(v) => match v[i] {
+                Some(v) => Some(Scalar::Float(v)),
+                None => None,
+            },
+            Self::NullInteger(v) => match v[i] {
+                Some(v) => Some(Scalar::Integer(v)),
+                None => None,
+            },
+            Self::Float(v) => Some(Scalar::Float(v[i])),
+            Self::Integer(v) => Some(Scalar::Integer(v[i])),
+            Self::Unsigned32(v) => Some(Scalar::Unsigned32(v[i])),
         }
     }
 
     pub fn swap(&mut self, a: usize, b: usize) {
         match self {
-            Self::String(v) => {
+            Self::NullString(v) => {
+                v.swap(a, b);
+            }
+            Self::NullFloat(v) => {
+                v.swap(a, b);
+            }
+            Self::NullInteger(v) => {
                 v.swap(a, b);
             }
             Self::Float(v) => {
@@ -336,7 +532,9 @@ impl<'a> Vector<'a> {
             Self::Integer(v) => {
                 v.swap(a, b);
             }
-            Vector::EncodedString(v) => v.swap(a, b),
+            Self::Unsigned32(v) => {
+                v.swap(a, b);
+            }
         }
     }
 }
@@ -352,8 +550,6 @@ impl AggregatableByRange for &Vector<'_> {
     }
 }
 
-/// VectorIterator allows a `Vector` to be iterated. Until vectors are drained
-/// Scalar values are emitted.
 pub struct VectorIterator<'a> {
     v: &'a Vector<'a>,
     next_i: usize,
@@ -379,14 +575,44 @@ impl<'a> Iterator for VectorIterator<'a> {
     }
 }
 
+/// NullVectorIterator allows a `Vector` to be iterated. Until vectors are
+/// drained Scalar values are emitted.
+///
+///
+/// TODO - need to figure this out - currently only returns scalars
+pub struct NullVectorIterator<'a> {
+    v: &'a Vector<'a>,
+    next_i: usize,
+}
+
+impl<'a> NullVectorIterator<'a> {
+    pub fn new(v: &'a Vector<'a>) -> Self {
+        Self { v, next_i: 0 }
+    }
+}
+impl<'a> Iterator for NullVectorIterator<'a> {
+    type Item = Option<Scalar<'a>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let curr_i = self.next_i;
+        self.next_i += 1;
+
+        if curr_i == self.v.len() {
+            return None;
+        }
+
+        Some(self.v.get_scalar(curr_i))
+    }
+}
+
 use chrono::prelude::*;
 
 impl<'a> std::fmt::Display for Vector<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::String(v) => write!(f, "{:?}", v),
-            Self::Float(v) => write!(f, "{:?}", v),
-            Self::Integer(v) => {
+            Self::NullString(v) => write!(f, "{:?}", v),
+            Self::NullFloat(v) => write!(f, "{:?}", v),
+            Self::NullInteger(v) => {
                 for x in v.iter() {
                     match x {
                         Some(x) => {
@@ -398,7 +624,16 @@ impl<'a> std::fmt::Display for Vector<'a> {
                 }
                 Ok(())
             }
-            Vector::EncodedString(v) => write!(f, "{:?}", v),
+            Self::Float(v) => write!(f, "{:?}", v),
+            Self::Integer(v) => {
+                // TODO(edd) remove as this is timestamp specific
+                for x in v.iter() {
+                    let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0);
+                    write!(f, "{}, ", ts)?;
+                }
+                Ok(())
+            }
+            Self::Unsigned32(v) => write!(f, "{:?}", v),
         }
     }
 }
@@ -474,10 +709,10 @@ impl Column {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
-                    return Vector::String(vec![]);
+                    return Vector::NullString(vec![]);
                 }
 
-                Vector::String(c.values(row_ids))
+                Vector::NullString(c.values(row_ids))
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -488,7 +723,7 @@ impl Column {
                 let v = c.values(row_ids);
                 log::debug!("time getting decoded values for float {:?}", now.elapsed());
 
-                Vector::Float(v)
+                Vector::NullFloat(v)
             }
             Column::Integer(c) => {
                 if row_ids.is_empty() {
@@ -498,7 +733,7 @@ impl Column {
                 let now = std::time::Instant::now();
                 let v = c.values(row_ids);
                 log::debug!("time getting decoded values for int {:?}", now.elapsed());
-                Vector::Integer(v)
+                Vector::NullInteger(v)
             }
         }
     }
@@ -509,7 +744,7 @@ impl Column {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
-                    return Vector::String(vec![]);
+                    return Vector::NullString(vec![]);
                 }
 
                 let row_id_vec = row_ids
@@ -517,7 +752,7 @@ impl Column {
                     .iter()
                     .map(|v| *v as usize)
                     .collect::<Vec<_>>();
-                Vector::String(c.values(&row_id_vec))
+                Vector::NullString(c.values(&row_id_vec))
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -529,7 +764,7 @@ impl Column {
                     .iter()
                     .map(|v| *v as usize)
                     .collect::<Vec<_>>();
-                Vector::Float(c.values(&row_id_vec))
+                Vector::NullFloat(c.values(&row_id_vec))
             }
             Column::Integer(c) => {
                 if row_ids.is_empty() {
@@ -541,7 +776,7 @@ impl Column {
                     .iter()
                     .map(|v| *v as usize)
                     .collect::<Vec<_>>();
-                Vector::Integer(c.values(&row_id_vec))
+                Vector::NullInteger(c.values(&row_id_vec))
             }
         }
     }
@@ -560,13 +795,13 @@ impl Column {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
-                    return Vector::Integer(vec![]);
+                    return Vector::Unsigned32(vec![]);
                 }
 
                 let now = std::time::Instant::now();
                 let v = c.encoded_values(&row_ids_vec);
                 log::debug!("time getting encoded values {:?}", now.elapsed());
-                Vector::EncodedString(v)
+                Vector::Unsigned32(v)
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -591,7 +826,7 @@ impl Column {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
-                    return Vector::Integer(vec![]);
+                    return Vector::Unsigned32(vec![]);
                 }
 
                 let now = std::time::Instant::now();
@@ -599,7 +834,7 @@ impl Column {
                 log::debug!("time getting encoded values {:?}", now.elapsed());
 
                 log::debug!("dictionary {:?}", c.data.dictionary());
-                Vector::EncodedString(v)
+                Vector::Unsigned32(v)
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -627,7 +862,7 @@ impl Column {
                 log::debug!("time getting all encoded values {:?}", now.elapsed());
 
                 log::debug!("dictionary {:?}", c.data.dictionary());
-                Vector::EncodedString(v)
+                Vector::Unsigned32(v)
             }
             Column::Float(c) => Vector::Float(c.all_encoded_values()),
             Column::Integer(c) => Vector::Integer(c.all_encoded_values()),
@@ -671,9 +906,9 @@ impl Column {
             row_ids_vec[0]
         );
         match self {
-            Column::String(c) => Vector::String(c.values(&row_ids_vec)),
-            Column::Float(c) => Vector::Float(c.values(&row_ids_vec)),
-            Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)),
+            Column::String(c) => Vector::NullString(c.values(&row_ids_vec)),
+            Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)),
+            Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)),
         }
     }
 
@@ -839,7 +1074,7 @@ impl Column {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => match agg_type {
                 AggregateType::Count => {
-                    Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64)
+                    Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id))
                 }
                 AggregateType::Sum => match c.sum_by_id_range(from_row_id, to_row_id) {
                     Some(sum) => Aggregate::Sum(Some(Scalar::Float(sum))),
@@ -1071,11 +1306,11 @@ impl String {
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<u32> {
         self.data.encoded_values(row_ids)
     }
 
-    pub fn all_encoded_values(&self) -> Vec<i64> {
+    pub fn all_encoded_values(&self) -> Vec<u32> {
         self.data.all_encoded_values()
     }
 
@@ -1288,7 +1523,7 @@ where
 
 impl<T> NumericColumn<T>
 where
-    T: Clone + std::cmp::PartialOrd + std::fmt::Debug,
+    T: Copy + Clone + std::cmp::PartialOrd + std::fmt::Debug,
 {
     pub fn column_range(&self) -> &Option<(T, T)> {
         self.meta.range()
@@ -1306,11 +1541,11 @@ where
         self.data.values(row_ids)
     }
 
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<Option<T>> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
         self.data.encoded_values(row_ids)
     }
 
-    pub fn all_encoded_values(&self) -> Vec<Option<T>> {
+    pub fn all_encoded_values(&self) -> Vec<T> {
         self.data.all_encoded_values()
     }
 
@@ -1334,7 +1569,7 @@ where
         self.data.sum_by_id_range(from_row_id, to_row_id)
     }
 
-    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize {
+    pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
         self.data.count_by_id_range(from_row_id, to_row_id)
     }
 }
@@ -1372,10 +1607,10 @@ pub mod metadata {
         }
 
         fn update_range(&mut self, v: T) {
-            match self.range {
+            match &mut self.range {
                 Some(range) => {
                     if v < range.0 {
-                        range.0 = v;
+                        range.0 = v.clone();
                     }
 
                     if v > range.1 {
@@ -1383,7 +1618,7 @@ pub mod metadata {
                     }
                 }
                 None => {
-                    self.range = Some((v, v));
+                    self.range = Some((v.clone(), v));
                 }
             }
         }
@@ -1409,7 +1644,7 @@ pub mod metadata {
         }
 
         pub fn maybe_contains_value(&self, v: T) -> bool {
-            match self.range {
+            match &self.range {
                 Some(range) => range.0 <= v && v <= range.1,
                 None => false,
             }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 1945ae3224..48f00182ae 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -89,29 +89,11 @@ where
     ///
     /// encoded_values should not be called on nullable columns.
     fn encoded_values(&self, row_ids: &[usize]) -> Vec<T::Native> {
-        // assertion here during development to check this isn't called on
-        // encodings that can have null values.
-        assert_eq!(self.arr.null_count(), 0);
-
-        let mut out = Vec::with_capacity(row_ids.len());
-        for &row_id in row_ids {
-            out.push(self.arr.value(row_id));
-        }
-        assert_eq!(out.len(), row_ids.len());
-        out
+        panic!("encoded_values not implemented yet");
     }
 
     fn all_encoded_values(&self) -> Vec<T::Native> {
-        // assertion here during development to check this isn't called on
-        // encodings that can have null values.
-        assert_eq!(self.arr.null_count(), 0);
-
-        let mut out = Vec::with_capacity(self.arr.len());
-        for i in 0..self.arr.len() {
-            out.push(self.arr.value(i));
-        }
-        assert_eq!(out.len(), self.arr.len());
-        out
+        panic!("all_encoded_values not implemented yet");
     }
 
     // TODO(edd): problem here is returning a slice because we need to own the
@@ -304,7 +286,7 @@ where
 
     /// Return the raw encoded values for the provided logical row ids. For Plain
     /// encoding this is just the decoded values.
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<Self::Item> {
+    fn encoded_values(&self, row_ids: &[usize]) -> Vec<T> {
         let mut out = Vec::with_capacity(row_ids.len());
         for chunks in row_ids.chunks_exact(4) {
             out.push(self.values[chunks[3]]);
@@ -324,7 +306,7 @@ where
 
     /// Return all encoded values. For this encoding this is just the decoded
     /// values
-    fn all_encoded_values(&self) -> Vec<Self::Item> {
+    fn all_encoded_values(&self) -> Vec<T> {
         self.values.clone() // TODO(edd):perf probably can return reference to vec.
     }
 
@@ -723,8 +705,8 @@ impl DictionaryRLE {
     ///
     /// TODO(edd): return type is wrong but I'm making it fit
     ///
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<i64> {
-        let mut out: Vec<i64> = Vec::with_capacity(row_ids.len());
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vec<u32> {
+        let mut out = Vec::with_capacity(row_ids.len());
 
         let mut curr_logical_row_id = 0;
 
@@ -746,7 +728,7 @@ impl DictionaryRLE {
             }
 
             // this entry covers the row_id we want.
-            out.push(curr_entry_id as i64);
+            out.push(curr_entry_id as u32);
             curr_logical_row_id += 1;
             curr_entry_rl -= 1;
         }
@@ -757,11 +739,11 @@ impl DictionaryRLE {
 
     // all_encoded_values materialises a vector of all encoded values for the
     // column.
-    pub fn all_encoded_values(&self) -> Vec<i64> {
-        let mut out: Vec<i64> = Vec::with_capacity(self.total as usize);
+    pub fn all_encoded_values(&self) -> Vec<u32> {
+        let mut out = Vec::with_capacity(self.total as usize);
 
         for (idx, rl) in &self.run_lengths {
-            out.extend(iter::repeat(*idx as i64).take(*rl as usize));
+            out.extend(iter::repeat(*idx as u32).take(*rl as usize));
         }
         out
     }
@@ -863,8 +845,8 @@ mod test {
             arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]),
         };
 
-        let encoded = col.all_encoded_values();
-        assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]);
+        // let encoded = col.all();
+        // assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]);
 
         let sum = col.sum_by_id_range(0, 1);
         assert_eq!(sum, Some(46.86));
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 9255817eaa..07bc024145 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -78,7 +78,8 @@ impl Segment {
         // TODO(edd) yuk
         if name == "time" {
             if let column::Column::Integer(ts) = &c {
-                self.meta.time_range = ts.column_range();
+                // Right now assumption is ts column has some non-null values
+                self.meta.time_range = ts.column_range().unwrap();
             } else {
                 panic!("incorrect column type for time");
             }
@@ -316,12 +317,10 @@ impl Segment {
         // filtering stage we will just emit None.
         let mut group_itrs = group_column_encoded_values
             .iter()
-            .map(|vector| {
-                if let column::Vector::Integer(v) = vector {
-                    v.iter()
-                } else {
-                    panic!("don't support grouping on non-encoded values");
-                }
+            .map(|vector| match vector {
+                column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
+                column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
+                _ => panic!("don't support grouping on non-encoded values or timestamps"),
             })
             .collect::<Vec<_>>();
 
@@ -331,7 +330,10 @@ impl Segment {
         let mut aggregate_itrs = aggregate_column_decoded_values
             .iter()
             .map(|(col_name, values)| match values {
-                Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
+                Some(values) => (
+                    col_name.as_str(),
+                    Some(column::NullVectorIterator::new(values)),
+                ),
                 None => (col_name.as_str(), None),
             })
             .collect::<Vec<_>>();
@@ -339,7 +341,7 @@ impl Segment {
         // hashMap is about 20% faster than BTreeMap in this case
         let mut hash_table: BTreeMap<
             Vec<i64>,
-            Vec<(&'a String, &'a AggregateType, Option<column::Aggregate<'_>>)>,
+            Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)>,
         > = BTreeMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar<'_>>)> =
@@ -355,29 +357,50 @@ impl Segment {
             group_itrs.iter_mut().enumerate().for_each(|(i, itr)| {
                 if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    group_key[i] = itr.next().unwrap() / window * window;
+                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                        group_key[i] = v / window * window;
+                    } else {
+                        unreachable!(
+                            "something broken with grouping! Either processed None or wrong type"
+                        );
+                    }
+                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
+                    itr.next()
+                {
+                    group_key[i] = v as i64
                 } else {
-                    group_key[i] = *itr.next().unwrap();
+                    unreachable!(
+                        "something broken with grouping! Either processed None or wrong type"
+                    );
                 }
             });
 
             // re-use aggregate_row vector.
             for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() {
                 match itr {
-                    Some(itr) => aggregate_row[i] = (col_name, itr.next()),
+                    Some(itr) => {
+                        // This is clunky. We don't need to check for the sentinel None value
+                        // to indicate the end of the iterator because we use the guard in
+                        // the while loop to do so.
+                        aggregate_row[i] = (col_name, itr.next().unwrap_or(None));
+                    }
                     None => aggregate_row[i] = (col_name, None),
                 }
             }
 
             // This is cheaper than allocating a key and using the entry API
             if !hash_table.contains_key(&group_key) {
-                let mut agg_results: Vec<(
-                    &'a String,
-                    &'a AggregateType,
-                    Option<column::Aggregate<'_>>,
-                )> = Vec::with_capacity(aggregates.len());
+                let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)> =
+                    Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
-                    agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option<column::Aggregate>
+                    agg_results.push((
+                        col_name,
+                        agg_type,
+                        match agg_type {
+                            AggregateType::Count => column::Aggregate::Count(0),
+                            AggregateType::Sum => column::Aggregate::Sum(None),
+                        },
+                    ));
                 }
                 hash_table.insert(group_key.clone(), agg_results);
             }
@@ -395,28 +418,39 @@ impl Segment {
                         continue;
                     }
 
-                    // TODO(edd): remove unwrap - it should work because we are
-                    // tracking iteration count in loop.
-                    let row_value = row_value.as_ref().unwrap();
-
                     match cum_agg_value {
-                        Some(agg) => match agg {
-                            column::Aggregate::Count(cum_count) => {
-                                *cum_count += 1;
-                            }
-                            column::Aggregate::Sum(cum_sum) => {
-                                *cum_sum += row_value;
-                            }
-                        },
-                        None => {
-                            *cum_agg_value = match agg_type {
-                                AggregateType::Count => Some(column::Aggregate::Count(0)),
-                                AggregateType::Sum => {
-                                    Some(column::Aggregate::Sum(row_value.clone()))
+                        column::Aggregate::Count(x) => {
+                            *x += 1;
+                        }
+                        column::Aggregate::Sum(v) => {
+                            if let Some(row_value) = row_value {
+                                match v {
+                                    Some(x) => {
+                                        *x += row_value;
+                                    }
+                                    None => *v = Some(row_value.clone()),
                                 }
                             }
                         }
                     }
+                    // match cum_agg_value {
+                    //     Some(agg) => match agg {
+                    //         column::Aggregate::Count(_) => {
+                    //             *cum_agg_value = Some(agg + column::Aggregate::Count(Some(1)));
+                    //         }
+                    //         column::Aggregate::Sum(cum_sum) => {
+                    //             *cum_sum += row_value;
+                    //         }
+                    //     },
+                    //     None => {
+                    //         *cum_agg_value = match agg_type {
+                    //             AggregateType::Count => Some(column::Aggregate::Count(Some(0))),
+                    //             AggregateType::Sum => {
+                    //                 Some(column::Aggregate::Sum(row_value.clone()))
+                    //             }
+                    //         }
+                    //     }
+                    // }
                 }
             }
             processed_rows += 1;
@@ -757,10 +791,6 @@ impl Segment {
     }
 
     // Returns the count aggregate for a given column name.
-    //
-    // Since we guarantee to provide row ids for the segment, and all columns
-    // have the same number of logical rows, the count is just the number of
-    // requested logical rows.
     pub fn count_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<u64> {
         if self.column(name).is_some() {
             return Some(row_ids.cardinality() as u64);
@@ -899,8 +929,8 @@ impl Segment {
                                 aggs.push((
                                     (col_name.to_string(), agg.clone()),
                                     column::Aggregate::Sum(
-                                        self.sum_column(col_name, &mut filtered_row_ids).unwrap(),
-                                    ), // assuming no non-null group keys
+                                        self.sum_column(col_name, &mut filtered_row_ids),
+                                    ),
                                 ));
                             }
                             AggregateType::Count => {
@@ -908,7 +938,7 @@ impl Segment {
                                     (col_name.to_string(), agg.clone()),
                                     column::Aggregate::Count(
                                         self.count_column(col_name, &mut filtered_row_ids).unwrap(),
-                                    ), // assuming no non-null group keys
+                                    ),
                                 ));
                             }
                         }
@@ -1392,7 +1422,7 @@ impl<'a> Segments<'a> {
         // first find the logical row id of the minimum timestamp value
         if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
             // TODO(edd): clean up unwrap
-            let min_ts = ts_col.column_range().0;
+            let min_ts = ts_col.column_range().unwrap().0;
             assert_eq!(min_ts, segment.meta.time_range.0);
 
             let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap();
@@ -1424,7 +1454,7 @@ impl<'a> Segments<'a> {
         // first find the logical row id of the minimum timestamp value
         if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] {
             // TODO(edd): clean up unwrap
-            let max_ts = ts_col.column_range().1;
+            let max_ts = ts_col.column_range().unwrap().1;
             assert_eq!(max_ts, segment.meta.time_range.1);
 
             let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap();
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
index fb592b87ad..55ae124ea6 100644
--- a/delorean_mem_qe/src/sorter.rs
+++ b/delorean_mem_qe/src/sorter.rs
@@ -39,7 +39,7 @@ pub enum Error {
 /// comparison scan performed on them to ensure they're not already sorted.
 const SORTED_CHECK_SIZE: usize = 1000;
 
-/// Sort a slice of `Packers` based on the provided column indexes.
+/// Sort a slice of `Vector` based on the provided column indexes.
 ///
 /// All chosen columns will be sorted in ascending order; the sort is *not*
 /// stable.
@@ -77,9 +77,6 @@ pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(),
             log::debug!("columns already sorted");
             return Ok(());
         }
-        // if vectors_sorted_asc(vectors, n, sort_by) {
-        //     return Ok(());
-        // }
     }
     let now = std::time::Instant::now();
     quicksort_by(vectors, 0..n - 1, sort_by);
@@ -136,7 +133,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range<usize>, sort_by:
 fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     for &idx in sort_by {
         match &vectors[idx] {
-            column::Vector::String(p) => {
+            column::Vector::NullString(p) => {
                 let cmp = p.get(a).cmp(&p.get(b));
                 if cmp != Ordering::Equal {
                     return cmp;
@@ -150,7 +147,7 @@ fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) ->
                 }
                 // if cmp equal then try next vector.
             }
-            _ => continue, // don't compare on non-string / timestamp cols
+            _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols
         }
     }
     Ordering::Equal
@@ -161,7 +158,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi
     'row_wise: for i in 1..len {
         for &idx in sort_by {
             match &vectors[idx] {
-                column::Vector::String(vec) => {
+                column::Vector::NullString(vec) => {
                     if vec[i - 1] < vec[i] {
                         continue 'row_wise;
                     } else if vec[i - 1] == vec[i] {
@@ -183,7 +180,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi
                         return false;
                     }
                 }
-                _ => continue, // don't compare on non-string / timestamp cols
+                _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols
             }
         }
     }

From 4f12e151d6e44f8fbe217c2f9bb0d8c4b01a754d Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 17 Sep 2020 09:50:17 +0100
Subject: [PATCH 59/73] refactor: running

---
 delorean_mem_qe/src/bin/main.rs |  42 +++++---
 delorean_mem_qe/src/column.rs   | 183 ++++++++++++++++++++++++++++++--
 delorean_mem_qe/src/encoding.rs |  15 +++
 delorean_mem_qe/src/segment.rs  |  95 +++++++++++++----
 delorean_mem_qe/src/sorter.rs   |  47 ++++----
 5 files changed, 322 insertions(+), 60 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 90acdd167f..40e446dd02 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -121,10 +121,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                if i < 364 {
-                    i += 1;
-                    continue;
-                }
+                // if i < 364 {
+                //     i += 1;
+                //     continue;
+                // }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -134,7 +134,7 @@ fn build_store(
                 let mut segment = Segment::new(rb.num_rows(), schema);
                 convert_record_batch(rb, &mut segment)?;
 
-                println!("{}", &segment);
+                // println!("{}", &segment);
                 store.add_segment(segment);
             }
             Ok(None) => {
@@ -166,30 +166,46 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er
                     .as_any()
                     .downcast_ref::<array::Float64Array>()
                     .unwrap();
-
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
+
+                // TODO(edd): figure out how to get ownership here without
+                // cloning
+                // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Int64 => {
                 if column.null_count() > 0 {
-                    panic!("null times");
+                    panic!("null integers not expected in testing");
                 }
                 let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
-
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
+
+                // TODO(edd): figure out how to get ownership here without
+                // cloning
+                // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => {
                 if column.null_count() > 0 {
-                    panic!("null times");
+                    panic!("null timestamps not expected in testing");
                 }
                 let arr = column
                     .as_any()
                     .downcast_ref::<array::TimestampMicrosecondArray>()
                     .unwrap();
-
                 let column = Column::from(arr.value_slice(0, rb.num_rows()));
                 segment.add_column(rb.schema().field(i).name(), column);
+
+                // TODO(edd): figure out how to get ownership here without
+                // cloning
+                // let arr: array::TimestampMicrosecondArray =
+                // arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Utf8 => {
                 let arr = column
@@ -469,9 +485,9 @@ fn time_group_single_with_pred(store: &Store) {
 fn time_group_by_multi_agg_count(store: &Store) {
     let strats = vec![
         GroupingStrategy::HashGroup,
-        // GroupingStrategy::HashGroupConcurrent,
+        GroupingStrategy::HashGroupConcurrent,
         GroupingStrategy::SortGroup,
-        // GroupingStrategy::SortGroupConcurrent,
+        GroupingStrategy::SortGroupConcurrent,
     ];
 
     for strat in &strats {
@@ -520,7 +536,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) {
     ];
 
     for strat in &strats {
-        let repeat = 1;
+        let repeat = 10;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 3b27743c2b..c76ce588e3 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -1251,17 +1251,36 @@ impl AggregatableByRange for &Column {
     }
 }
 
-// impl From<&[f64]> for Column {
-//     fn from(values: &[f64]) -> Self {
-//         Self::Float(Float::from(values))
-//     }
-// }
+use arrow::array::{Float64Array, Int64Array, TimestampMicrosecondArray};
+impl From<Float64Array> for Column {
+    fn from(arr: arrow::array::Float64Array) -> Self {
+        Self::Float(NumericColumn::from(arr))
+    }
+}
 
-// impl From<&[i64]> for Column {
-//     fn from(values: &[i64]) -> Self {
-//         Self::Integer(Integer::from(values))
-//     }
-// }
+impl From<TimestampMicrosecondArray> for Column {
+    fn from(arr: TimestampMicrosecondArray) -> Self {
+        Self::Integer(NumericColumn::from(arr))
+    }
+}
+
+impl From<Int64Array> for Column {
+    fn from(arr: Int64Array) -> Self {
+        Self::Integer(NumericColumn::from(arr))
+    }
+}
+
+impl From<&[f64]> for Column {
+    fn from(values: &[f64]) -> Self {
+        Self::Float(NumericColumn::from(values))
+    }
+}
+
+impl From<&[i64]> for Column {
+    fn from(values: &[i64]) -> Self {
+        Self::Integer(NumericColumn::from(values))
+    }
+}
 
 #[derive(Debug, Default)]
 pub struct String {
@@ -1583,6 +1602,150 @@ where
     }
 }
 
+use arrow::array::Array;
+impl From<arrow::array::Float64Array> for NumericColumn<f64> {
+    fn from(arr: arrow::array::Float64Array) -> Self {
+        let len = arr.len();
+        let mut range: Option<(f64, f64)> = None;
+
+        // calculate min/max for meta data
+        // TODO(edd): can use compute kernels for this.
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                continue;
+            }
+
+            let v = arr.value(i);
+            match range {
+                Some(mut range) => {
+                    range.0 = range.0.min(v);
+                    range.1 = range.1.max(v);
+                }
+                None => {
+                    range = Some((v, v));
+                }
+            }
+        }
+
+        Self {
+            meta: metadata::Metadata::new(range, len),
+            data: Box::new(encoding::PlainArrow::new(arr)),
+        }
+    }
+}
+
+impl From<arrow::array::Int64Array> for NumericColumn<i64> {
+    fn from(arr: arrow::array::Int64Array) -> Self {
+        let len = arr.len();
+        let mut range: Option<(i64, i64)> = None;
+
+        // calculate min/max for meta data
+        // TODO(edd): can use compute kernels for this.
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                continue;
+            }
+
+            let v = arr.value(i);
+            match range {
+                Some(mut range) => {
+                    range.0 = range.0.min(v);
+                    range.1 = range.1.max(v);
+                }
+                None => {
+                    range = Some((v, v));
+                }
+            }
+        }
+
+        Self {
+            meta: metadata::Metadata::new(range, len),
+            data: Box::new(encoding::PlainArrow::new(arr)),
+        }
+    }
+}
+
+impl From<arrow::array::TimestampMicrosecondArray> for NumericColumn<i64> {
+    fn from(arr: arrow::array::TimestampMicrosecondArray) -> Self {
+        let len = arr.len();
+        let mut range: Option<(i64, i64)> = None;
+
+        // calculate min/max for meta data
+        // TODO(edd): can use compute kernels for this.
+        for i in 0..arr.len() {
+            if arr.is_null(i) {
+                continue;
+            }
+
+            let v = arr.value(i);
+            match range {
+                Some(mut range) => {
+                    range.0 = range.0.min(v);
+                    range.1 = range.1.max(v);
+                }
+                None => {
+                    range = Some((v, v));
+                }
+            }
+        }
+
+        Self {
+            meta: metadata::Metadata::new(range, len),
+            data: Box::new(encoding::PlainArrow::new(arr)),
+        }
+    }
+}
+
+impl From<&[f64]> for NumericColumn<f64> {
+    fn from(values: &[f64]) -> Self {
+        let len = values.len();
+        let mut range: Option<(f64, f64)> = None;
+
+        // calculate min/max for meta data
+        for &v in values {
+            match range {
+                Some(mut range) => {
+                    range.0 = range.0.min(v);
+                    range.1 = range.1.max(v);
+                }
+                None => {
+                    range = Some((v, v));
+                }
+            }
+        }
+
+        Self {
+            meta: metadata::Metadata::new(range, len),
+            data: Box::new(encoding::PlainFixed::from(values)),
+        }
+    }
+}
+
+impl From<&[i64]> for NumericColumn<i64> {
+    fn from(values: &[i64]) -> Self {
+        let len = values.len();
+        let mut range: Option<(i64, i64)> = None;
+
+        // calculate min/max for meta data
+        for &v in values {
+            match range {
+                Some(mut range) => {
+                    range.0 = range.0.min(v);
+                    range.1 = range.1.max(v);
+                }
+                None => {
+                    range = Some((v, v));
+                }
+            }
+        }
+
+        Self {
+            meta: metadata::Metadata::new(range, len),
+            data: Box::new(encoding::PlainFixed::from(values)),
+        }
+    }
+}
+
 pub mod metadata {
     use std::mem::size_of;
 
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 48f00182ae..e27df1ae58 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -48,6 +48,21 @@ where
     arr: PrimitiveArray<T>,
 }
 
+impl<T> PlainArrow<T>
+where
+    T: ArrowNumericType + std::fmt::Debug,
+    T::Native: Default
+        + PartialEq
+        + PartialOrd
+        + Copy
+        + std::fmt::Debug
+        + std::ops::Add<Output = T::Native>,
+{
+    pub fn new(arr: PrimitiveArray<T>) -> Self {
+        Self { arr }
+    }
+}
+
 impl<T> NumericEncoding for PlainArrow<T>
 where
     T: ArrowNumericType + std::fmt::Debug,
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 07bc024145..b8626ea164 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -566,15 +566,25 @@ impl Segment {
         }
         log::debug!("time checking sort {:?}", now.elapsed());
 
+        // let group_itrs = all_columns
+        //     .iter()
+        //     .take(group_columns.len()) // only use grouping columns
+        //     .map(|vector| {
+        //         if let column::Vector::Integer(v) = vector {
+        //             v.iter()
+        //         } else {
+        //             panic!("don't support grouping on non-encoded values");
+        //         }
+        //     })
+        //     .collect::<Vec<_>>();
+
         let group_itrs = all_columns
             .iter()
-            .take(group_columns.len()) // only use grouping columns
-            .map(|vector| {
-                if let column::Vector::Integer(v) = vector {
-                    v.iter()
-                } else {
-                    panic!("don't support grouping on non-encoded values");
-                }
+            .take(group_columns.len())
+            .map(|vector| match vector {
+                column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
+                column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
+                _ => panic!("don't support grouping on non-encoded values or timestamps"),
             })
             .collect::<Vec<_>>();
 
@@ -653,17 +663,26 @@ impl Segment {
             }
         }
 
-        let group_itrs = group_column_encoded_values
+        let mut group_itrs = group_column_encoded_values
             .iter()
-            .map(|vector| {
-                if let column::Vector::Integer(v) = vector {
-                    v.iter()
-                } else {
-                    panic!("don't support grouping on non-encoded values");
-                }
+            .map(|vector| match vector {
+                column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
+                column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
+                _ => panic!("don't support grouping on non-encoded values or timestamps"),
             })
             .collect::<Vec<_>>();
 
+        // let group_itrs = group_column_encoded_values
+        //     .iter()
+        //     .map(|vector| {
+        //         if let column::Vector::Integer(v) = vector {
+        //             v.iter()
+        //         } else {
+        //             panic!("don't support grouping on non-encoded values");
+        //         }
+        //     })
+        //     .collect::<Vec<_>>();
+
         let mut aggregate_cols = Vec::with_capacity(aggregates.len());
         for (column_name, agg_type) in aggregates {
             aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap()));
@@ -676,7 +695,7 @@ impl Segment {
     // available and appropriately sorted this method will build a result set of
     // aggregates in a streaming way.
     pub fn stream_grouped_aggregates<'a>(
-        mut group_itrs: Vec<core::slice::Iter<'_, i64>>,
+        mut group_itrs: Vec<column::VectorIterator<'_>>,
         aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>,
         total_rows: usize,
         window: i64,
@@ -688,11 +707,30 @@ impl Segment {
             .iter_mut()
             .enumerate()
             .map(|(i, itr)| {
+                // if i == group_itrs_len - 1 && window > 0 {
+                //     // time column - apply window function
+                //     return itr.next().unwrap() / window * window;
+                // }
+                // *itr.next().unwrap()
+
                 if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    return itr.next().unwrap() / window * window;
+                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                        v / window * window
+                    } else {
+                        unreachable!(
+                            "something broken with grouping! Either processed None or wrong type"
+                        );
+                    }
+                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
+                    itr.next()
+                {
+                    v as i64
+                } else {
+                    unreachable!(
+                        "something broken with grouping! Either processed None or wrong type"
+                    );
                 }
-                *itr.next().unwrap()
             })
             .collect::<Vec<_>>();
 
@@ -713,12 +751,31 @@ impl Segment {
                 .zip(group_itrs.iter_mut())
                 .enumerate()
             {
+                // let next_v = if i == group_itrs_len - 1 && window > 0 {
+                //     // time column - apply window function
+                //     itr.next().unwrap() / window * window
+                // } else {
+                //     *itr.next().unwrap()
+                // };
                 let next_v = if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    itr.next().unwrap() / window * window
+                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                        v / window * window
+                    } else {
+                        unreachable!(
+                            "something broken with grouping! Either processed None or wrong type"
+                        );
+                    }
+                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
+                    itr.next()
+                {
+                    v as i64
                 } else {
-                    *itr.next().unwrap()
+                    unreachable!(
+                        "something broken with grouping! Either processed None or wrong type"
+                    );
                 };
+
                 if curr_v != &next_v {
                     group_key_changed = true;
                 }
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
index 55ae124ea6..7dd7d6ced3 100644
--- a/delorean_mem_qe/src/sorter.rs
+++ b/delorean_mem_qe/src/sorter.rs
@@ -133,7 +133,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range<usize>, sort_by:
 fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     for &idx in sort_by {
         match &vectors[idx] {
-            column::Vector::NullString(p) => {
+            column::Vector::Unsigned32(p) => {
                 let cmp = p.get(a).cmp(&p.get(b));
                 if cmp != Ordering::Equal {
                     return cmp;
@@ -158,27 +158,38 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi
     'row_wise: for i in 1..len {
         for &idx in sort_by {
             match &vectors[idx] {
-                column::Vector::NullString(vec) => {
-                    if vec[i - 1] < vec[i] {
-                        continue 'row_wise;
-                    } else if vec[i - 1] == vec[i] {
-                        // try next column
-                        continue;
-                    } else {
-                        // value is > so
-                        return false;
+                column::Vector::Unsigned32(vec) => {
+                    match vec[i - 1].cmp(&vec[i]) {
+                        Ordering::Less => continue 'row_wise,
+                        Ordering::Equal => continue,
+                        Ordering::Greater => return false,
                     }
+                    // if vec[i - 1] < vec[i] {
+                    //     continue 'row_wise;
+                    // } else if vec[i - 1] == vec[i] {
+                    //     // try next column
+                    //     continue;
+                    // } else {
+                    //     // value is > so
+                    //     return false;
+                    // }
                 }
                 column::Vector::Integer(vec) => {
-                    if vec[i - 1] < vec[i] {
-                        continue 'row_wise;
-                    } else if vec[i - 1] == vec[i] {
-                        // try next column
-                        continue;
-                    } else {
-                        // value is > so
-                        return false;
+                    match vec[i - 1].cmp(&vec[i]) {
+                        Ordering::Less => continue 'row_wise,
+                        Ordering::Equal => continue,
+                        Ordering::Greater => return false,
                     }
+
+                    // if vec[i - 1] < vec[i] {
+                    //     continue 'row_wise;
+                    // } else if vec[i - 1] == vec[i] {
+                    //     // try next column
+                    //     continue;
+                    // } else {
+                    //     // value is > so
+                    //     return false;
+                    // }
                 }
                 _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols
             }

From 751fa013e7dbd42844c402467da092e7c7f59402 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 17 Sep 2020 21:52:50 +0100
Subject: [PATCH 60/73] fix: fix some bugs

---
 delorean_mem_qe/src/bin/main.rs | 30 ++++++++---------
 delorean_mem_qe/src/column.rs   | 59 +++++++++++++++++++++++++--------
 delorean_mem_qe/src/encoding.rs | 27 ++++++++++++---
 delorean_mem_qe/src/segment.rs  | 23 +++++++------
 4 files changed, 95 insertions(+), 44 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 40e446dd02..2061b229da 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -63,14 +63,14 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    time_select_with_pred(&store);
-    time_datafusion_select_with_pred(store.clone());
-    time_first_host(&store);
-    time_sum_range(&store);
-    time_count_range(&store);
-    time_group_single_with_pred(&store);
-    time_group_by_multi_agg_count(&store);
-    time_group_by_multi_agg_sorted_count(&store);
+    // time_select_with_pred(&store);
+    // time_datafusion_select_with_pred(store.clone());
+    // time_first_host(&store);
+    // time_sum_range(&store);
+    // time_count_range(&store);
+    // time_group_single_with_pred(&store);
+    // time_group_by_multi_agg_count(&store);
+    // time_group_by_multi_agg_sorted_count(&store);
     time_window_agg_count(&store);
     // time_group_by_different_columns(&store);
 }
@@ -121,10 +121,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                // if i < 364 {
-                //     i += 1;
-                //     continue;
-                // }
+                if i < 364 {
+                    i += 1;
+                    continue;
+                }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -134,7 +134,7 @@ fn build_store(
                 let mut segment = Segment::new(rb.num_rows(), schema);
                 convert_record_batch(rb, &mut segment)?;
 
-                // println!("{}", &segment);
+                log::debug!("{}", &segment);
                 store.add_segment(segment);
             }
             Ok(None) => {
@@ -499,7 +499,7 @@ fn time_group_by_multi_agg_count(store: &Store) {
             let now = std::time::Instant::now();
 
             let groups = segments.read_group_eq(
-                (1589000000000001, 1590044410000000),
+                (1589000000000001, 1590044410000001),
                 &[],
                 vec!["status".to_string(), "method".to_string()],
                 vec![("counter".to_string(), AggregateType::Count)],
@@ -575,7 +575,7 @@ fn time_window_agg_count(store: &Store) {
     ];
 
     for strat in &strats {
-        let repeat = 1;
+        let repeat = 10;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
         let mut total_max = 0;
         let segments = store.segments();
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index c76ce588e3..61081d2de2 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -393,20 +393,41 @@ impl<'a> Vector<'a> {
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
         match self {
             Self::NullString(vec) => {
-                let count = vec.iter().filter(|x| x.is_some()).count();
+                let mut count = 0;
+                for v in &vec[from_row_id..to_row_id] {
+                    if v.is_some() {
+                        count += 1;
+                    }
+                }
                 count as u64
             }
             Self::NullFloat(vec) => {
-                let count = vec.iter().filter(|x| x.is_some()).count();
+                let mut count = 0;
+                for v in &vec[from_row_id..to_row_id] {
+                    if v.is_some() {
+                        count += 1;
+                    }
+                }
                 count as u64
             }
             Self::NullInteger(vec) => {
-                let count = vec.iter().filter(|x| x.is_some()).count();
+                let mut count = 0;
+                for v in &vec[from_row_id..to_row_id] {
+                    if v.is_some() {
+                        count += 1;
+                    }
+                }
                 count as u64
             }
-            Self::Float(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
-            Self::Integer(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
-            Self::Unsigned32(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values
+            Self::Float(_) => {
+                (to_row_id - from_row_id) as u64 // fast - no possible NULL values
+            }
+            Self::Integer(_) => {
+                (to_row_id - from_row_id) as u64 // fast - no possible NULL values
+            }
+            Self::Unsigned32(_) => {
+                (to_row_id - from_row_id) as u64 // fast - no possible NULL values
+            }
         }
     }
 
@@ -705,6 +726,13 @@ impl Column {
 
     /// Materialise all of the decoded values matching the provided logical
     /// row ids.
+    //
+    // FIXME(edd): we need to provide an API on an encoding to return raw_values
+    // so that we can return non-null vectors when we know the underlying encoding
+    // doesn't contain any null values. Right now we return nullable vectors, w
+    // which take up more memory and mean we can't do fast counts (since we need
+    // to check each value is non-null).
+    //
     pub fn values(&self, row_ids: &[usize]) -> Vector<'_> {
         match self {
             Column::String(c) => {
@@ -1227,13 +1255,13 @@ impl std::fmt::Display for Column {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match &self {
             Column::String(c) => {
-                write!(f, "{}", c)?;
+                write!(f, "[String Column]: {}", c)?;
             }
             Column::Float(c) => {
-                write!(f, "{}", c)?;
+                write!(f, "[Float Column]:{}", c)?;
             }
             Column::Integer(c) => {
-                write!(f, "{}", c)?;
+                write!(f, "[Integer Column]: {}", c)?;
             }
         }
         Ok(())
@@ -1617,7 +1645,7 @@ impl From<arrow::array::Float64Array> for NumericColumn<f64> {
 
             let v = arr.value(i);
             match range {
-                Some(mut range) => {
+                Some(ref mut range) => {
                     range.0 = range.0.min(v);
                     range.1 = range.1.max(v);
                 }
@@ -1648,7 +1676,7 @@ impl From<arrow::array::Int64Array> for NumericColumn<i64> {
 
             let v = arr.value(i);
             match range {
-                Some(mut range) => {
+                Some(ref mut range) => {
                     range.0 = range.0.min(v);
                     range.1 = range.1.max(v);
                 }
@@ -1679,7 +1707,7 @@ impl From<arrow::array::TimestampMicrosecondArray> for NumericColumn<i64> {
 
             let v = arr.value(i);
             match range {
-                Some(mut range) => {
+                Some(ref mut range) => {
                     range.0 = range.0.min(v);
                     range.1 = range.1.max(v);
                 }
@@ -1704,7 +1732,10 @@ impl From<&[f64]> for NumericColumn<f64> {
         // calculate min/max for meta data
         for &v in values {
             match range {
-                Some(mut range) => {
+                // wow this ref totally confused me for a while. Without it
+                // the code will compile fine but the range option will never
+                // reflect changes because the tuple range will be a copy.
+                Some(ref mut range) => {
                     range.0 = range.0.min(v);
                     range.1 = range.1.max(v);
                 }
@@ -1729,7 +1760,7 @@ impl From<&[i64]> for NumericColumn<i64> {
         // calculate min/max for meta data
         for &v in values {
             match range {
-                Some(mut range) => {
+                Some(ref mut range) => {
                     range.0 = range.0.min(v);
                     range.1 = range.1.max(v);
                 }
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index e27df1ae58..ae9085e531 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -217,7 +217,13 @@ where
         + std::ops::Add<Output = T::Native>,
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "[PlainArrow<T>] size: {}", self.size())
+        write!(
+            f,
+            "[PlainArrow<T>] rows: {:?}, nulls: {:?}, size: {}",
+            self.arr.len(),
+            self.arr.null_count(),
+            self.size()
+        )
     }
 }
 
@@ -245,7 +251,12 @@ where
         + std::ops::AddAssign,
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "[PlainFixed<T>] size: {}", self.size(),)
+        write!(
+            f,
+            "[PlainFixed<T>] rows: {:?}, size: {}",
+            self.values.len(),
+            self.size()
+        )
     }
 }
 
@@ -481,6 +492,7 @@ pub struct DictionaryRLE {
     // of times the entry repeats.
     run_lengths: Vec<(usize, u64)>,
 
+    nulls: u64,
     total: u64,
 }
 
@@ -492,6 +504,7 @@ impl DictionaryRLE {
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
+            nulls: 0,
             total: 0,
         }
     }
@@ -503,6 +516,7 @@ impl DictionaryRLE {
             index_entry: BTreeMap::new(),
             map_size: 0,
             run_lengths: Vec::new(),
+            nulls: 0,
             total: 0,
         };
 
@@ -514,7 +528,7 @@ impl DictionaryRLE {
                 .index_row_ids
                 .insert(next_idx as u32, croaring::Bitmap::create());
 
-            _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta
+            _self.run_lengths.push((next_idx, 0)); // could this cause a bug?
         }
         _self
     }
@@ -568,6 +582,9 @@ impl DictionaryRLE {
             }
         }
         self.total += additional;
+        if v.is_none() {
+            self.nulls += additional;
+        }
     }
 
     // row_ids returns an iterator over the set of row ids matching the provided
@@ -817,7 +834,9 @@ impl std::fmt::Display for DictionaryRLE {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
-            "[DictionaryRLE] size: {}, dict entries: {}, runs: {} ",
+            "[DictionaryRLE] rows: {:?} nulls: {:?}, size: {}, dict entries: {}, runs: {} ",
+            self.total,
+            self.nulls,
             self.size(),
             self.index_entry.len(),
             self.run_lengths.len()
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index b8626ea164..99cf527d0b 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -229,8 +229,7 @@ impl Segment {
         group_columns: &[String],
         aggregates: &'a [(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, Option<column::Aggregate<'a>>)>>
-    {
+    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, column::Aggregate<'a>)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -497,6 +496,7 @@ impl Segment {
             .iter()
             .map(|v| *v as usize)
             .collect::<Vec<_>>();
+        log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len());
 
         // materialise all encoded values for the matching rows in the columns
         // we are grouping on and store each group as an iterator.
@@ -557,15 +557,13 @@ impl Segment {
         }
 
         let now = std::time::Instant::now();
-        if self.group_key_sorted(group_columns) {
-            panic!("This shouldn't be called!!!");
-        } else {
-            // now sort on the first grouping columns. Right now the order doesn't matter...
-            let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
-            super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
-        }
+        assert!(!self.group_key_sorted(group_columns)); // should always need a sort if in this method
         log::debug!("time checking sort {:?}", now.elapsed());
 
+        // now sort on the first grouping columns. Right now the order doesn't matter...
+        let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
+        super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
+
         // let group_itrs = all_columns
         //     .iter()
         //     .take(group_columns.len()) // only use grouping columns
@@ -582,7 +580,9 @@ impl Segment {
             .iter()
             .take(group_columns.len())
             .map(|vector| match vector {
-                column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
+                column::Vector::Unsigned32(_) => {
+                    column::VectorIterator::new(vector) // encoded tag columns
+                }
                 column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
                 _ => panic!("don't support grouping on non-encoded values or timestamps"),
             })
@@ -641,6 +641,7 @@ impl Segment {
             .iter()
             .map(|v| *v as usize)
             .collect::<Vec<_>>();
+        log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len());
 
         // materialise all encoded values for the matching rows in the columns
         // we are grouping on and store each group as an iterator.
@@ -663,7 +664,7 @@ impl Segment {
             }
         }
 
-        let mut group_itrs = group_column_encoded_values
+        let group_itrs = group_column_encoded_values
             .iter()
             .map(|vector| match vector {
                 column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns

From f0b371cd6e39ab7304f2130533a8a3e75fb2ed71 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 18 Sep 2020 10:33:01 +0100
Subject: [PATCH 61/73] feat: arrow buffers working

---
 delorean_mem_qe/src/bin/main.rs | 76 ++++++++++++++++-----------------
 delorean_mem_qe/src/encoding.rs | 56 ++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 2061b229da..0f7cd65d66 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -63,15 +63,15 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    // time_select_with_pred(&store);
-    // time_datafusion_select_with_pred(store.clone());
-    // time_first_host(&store);
-    // time_sum_range(&store);
-    // time_count_range(&store);
-    // time_group_single_with_pred(&store);
-    // time_group_by_multi_agg_count(&store);
-    // time_group_by_multi_agg_sorted_count(&store);
-    time_window_agg_count(&store);
+    time_select_with_pred(&store);
+    time_datafusion_select_with_pred(store.clone());
+    time_first_host(&store);
+    time_sum_range(&store);
+    time_count_range(&store);
+    time_group_single_with_pred(&store);
+    time_group_by_multi_agg_count(&store);
+    time_group_by_multi_agg_sorted_count(&store);
+    // time_window_agg_count(&store);
     // time_group_by_different_columns(&store);
 }
 
@@ -121,10 +121,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                if i < 364 {
-                    i += 1;
-                    continue;
-                }
+                // if i < 364 {
+                //     i += 1;
+                //     continue;
+                // }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -162,50 +162,50 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er
                 if column.null_count() > 0 {
                     panic!("null floats");
                 }
-                let arr = column
-                    .as_any()
-                    .downcast_ref::<array::Float64Array>()
-                    .unwrap();
-                let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr = column
+                //     .as_any()
+                //     .downcast_ref::<array::Float64Array>()
+                //     .unwrap();
+                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                // segment.add_column(rb.schema().field(i).name(), column);
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data());
-                // let column = Column::from(arr);
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data());
+                let column = Column::from(arr);
+                segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Int64 => {
                 if column.null_count() > 0 {
                     panic!("null integers not expected in testing");
                 }
-                let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
-                let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
+                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                // segment.add_column(rb.schema().field(i).name(), column);
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data());
-                // let column = Column::from(arr);
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data());
+                let column = Column::from(arr);
+                segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => {
                 if column.null_count() > 0 {
                     panic!("null timestamps not expected in testing");
                 }
-                let arr = column
-                    .as_any()
-                    .downcast_ref::<array::TimestampMicrosecondArray>()
-                    .unwrap();
-                let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr = column
+                //     .as_any()
+                //     .downcast_ref::<array::TimestampMicrosecondArray>()
+                //     .unwrap();
+                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                // segment.add_column(rb.schema().field(i).name(), column);
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                // let arr: array::TimestampMicrosecondArray =
-                // arrow::array::PrimitiveArray::from(column.data());
-                // let column = Column::from(arr);
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr: array::TimestampMicrosecondArray =
+                    arrow::array::PrimitiveArray::from(column.data());
+                let column = Column::from(arr);
+                segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Utf8 => {
                 let arr = column
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index ae9085e531..e99f70cbfa 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -23,7 +23,10 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64;
     fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64;
 
+    // Returns the index of the first value equal to `v`
     fn row_id_eq_value(&self, v: Self::Item) -> Option<usize>;
+
+    // Returns the index of the first value greater or equal to `v`
     fn row_id_ge_value(&self, v: Self::Item) -> Option<usize>;
 
     fn row_ids_single_cmp_roaring(
@@ -186,11 +189,25 @@ where
     }
 
     fn row_id_eq_value(&self, v: Self::Item) -> Option<usize> {
-        todo!()
+        for i in 0..self.arr.len() {
+            if self.arr.is_null(i) {
+                continue;
+            } else if self.arr.value(i) == v {
+                return Some(i);
+            }
+        }
+        None
     }
 
     fn row_id_ge_value(&self, v: Self::Item) -> Option<usize> {
-        todo!()
+        for i in 0..self.arr.len() {
+            if self.arr.is_null(i) {
+                continue;
+            } else if self.arr.value(i) >= v {
+                return Some(i);
+            }
+        }
+        None
     }
 
     fn row_ids_single_cmp_roaring(
@@ -202,7 +219,38 @@ where
     }
 
     fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap {
-        todo!()
+        let mut bm = croaring::Bitmap::create();
+
+        let mut found = false; //self.values[0];
+        let mut count = 0;
+        let mut i = 0;
+        for i in 0..self.arr.len() {
+            let next = &self.arr.value(i);
+            if (self.arr.is_null(i) || next < from || next >= to) && found {
+                let (min, max) = (i as u64 - count as u64, i as u64);
+                bm.add_range(min..max);
+                found = false;
+                count = 0;
+                continue;
+            } else if self.arr.is_null(i) || next < from || next >= to {
+                continue;
+            }
+
+            if !found {
+                found = true;
+            }
+            count += 1;
+        }
+
+        // add any remaining range.
+        if found {
+            let (min, max) = (
+                (self.arr.len()) as u64 - count as u64,
+                (self.arr.len()) as u64,
+            );
+            bm.add_range(min..max);
+        }
+        bm
     }
 }
 
@@ -528,7 +576,7 @@ impl DictionaryRLE {
                 .index_row_ids
                 .insert(next_idx as u32, croaring::Bitmap::create());
 
-            _self.run_lengths.push((next_idx, 0)); // could this cause a bug?
+            _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta
         }
         _self
     }

From e3d805e997a662c30336f8186ee7be2a72871f78 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 22 Sep 2020 11:37:09 +0100
Subject: [PATCH 62/73] feat: tag_keys implementation

---
 delorean_mem_qe/src/adapter.rs  |   2 +-
 delorean_mem_qe/src/bin/main.rs | 112 ++++++++++++++--------
 delorean_mem_qe/src/column.rs   |  18 +++-
 delorean_mem_qe/src/encoding.rs | 102 +++++++++++++++++++-
 delorean_mem_qe/src/segment.rs  | 165 ++++++++++++++++++++++++++------
 5 files changed, 323 insertions(+), 76 deletions(-)

diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
index a0fdab6af6..c2537fa53c 100644
--- a/delorean_mem_qe/src/adapter.rs
+++ b/delorean_mem_qe/src/adapter.rs
@@ -309,7 +309,7 @@ impl Partition for SegmentPartition {
         // Here
         let _columns = segments.read_filter_eq(
             self.time_range,
-            &[(col_name, Some(&scalar))],
+            &[(col_name, Some(scalar))],
             vec![
                 "env".to_string(),
                 "method".to_string(),
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 0f7cd65d66..e6251c0dd4 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -13,7 +13,7 @@ use arrow::{array, array::Array, datatypes, ipc};
 
 use delorean_mem_qe::column;
 use delorean_mem_qe::column::{AggregateType, Column};
-use delorean_mem_qe::segment::{GroupingStrategy, Schema, Segment};
+use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment};
 use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
 use parquet::arrow::arrow_reader::ArrowReader;
 
@@ -63,15 +63,16 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    time_select_with_pred(&store);
-    time_datafusion_select_with_pred(store.clone());
-    time_first_host(&store);
-    time_sum_range(&store);
-    time_count_range(&store);
-    time_group_single_with_pred(&store);
-    time_group_by_multi_agg_count(&store);
-    time_group_by_multi_agg_sorted_count(&store);
+    // time_select_with_pred(&store);
+    // time_datafusion_select_with_pred(store.clone());
+    // time_first_host(&store);
+    // time_sum_range(&store);
+    // time_count_range(&store);
+    // time_group_single_with_pred(&store);
+    // time_group_by_multi_agg_count(&store);
+    // time_group_by_multi_agg_sorted_count(&store);
     // time_window_agg_count(&store);
+    time_tag_keys_with_pred(&store);
     // time_group_by_different_columns(&store);
 }
 
@@ -121,10 +122,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                // if i < 364 {
-                //     i += 1;
-                //     continue;
-                // }
+                if i < 360 {
+                    i += 1;
+                    continue;
+                }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -162,50 +163,50 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er
                 if column.null_count() > 0 {
                     panic!("null floats");
                 }
-                // let arr = column
-                //     .as_any()
-                //     .downcast_ref::<array::Float64Array>()
-                //     .unwrap();
-                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr = column
+                    .as_any()
+                    .downcast_ref::<array::Float64Array>()
+                    .unwrap();
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), ColumnType::Field(column));
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data());
-                let column = Column::from(arr);
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Int64 => {
                 if column.null_count() > 0 {
                     panic!("null integers not expected in testing");
                 }
-                // let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
-                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr = column.as_any().downcast_ref::<array::Int64Array>().unwrap();
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), ColumnType::Time(column));
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data());
-                let column = Column::from(arr);
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => {
                 if column.null_count() > 0 {
                     panic!("null timestamps not expected in testing");
                 }
-                // let arr = column
-                //     .as_any()
-                //     .downcast_ref::<array::TimestampMicrosecondArray>()
-                //     .unwrap();
-                // let column = Column::from(arr.value_slice(0, rb.num_rows()));
-                // segment.add_column(rb.schema().field(i).name(), column);
+                let arr = column
+                    .as_any()
+                    .downcast_ref::<array::TimestampMicrosecondArray>()
+                    .unwrap();
+                let column = Column::from(arr.value_slice(0, rb.num_rows()));
+                segment.add_column(rb.schema().field(i).name(), ColumnType::Time(column));
 
                 // TODO(edd): figure out how to get ownership here without
                 // cloning
-                let arr: array::TimestampMicrosecondArray =
-                    arrow::array::PrimitiveArray::from(column.data());
-                let column = Column::from(arr);
-                segment.add_column(rb.schema().field(i).name(), column);
+                // let arr: array::TimestampMicrosecondArray =
+                //     arrow::array::PrimitiveArray::from(column.data());
+                // let column = Column::from(arr);
+                // segment.add_column(rb.schema().field(i).name(), column);
             }
             datatypes::DataType::Utf8 => {
                 let arr = column
@@ -266,7 +267,10 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er
                     None => c.add_additional(None, count),
                 }
 
-                segment.add_column(rb.schema().field(i).name(), Column::String(c));
+                segment.add_column(
+                    rb.schema().field(i).name(),
+                    ColumnType::Tag(Column::String(c)),
+                );
             }
             datatypes::DataType::Boolean => {
                 panic!("unsupported");
@@ -383,7 +387,7 @@ fn time_select_with_pred(store: &Store) {
 
         let columns = segments.read_filter_eq(
             (1590036110000000, 1590040770000000),
-            &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))],
+            &[("env", Some(column::Scalar::String("prod01-eu-central-1")))],
             vec![
                 "env".to_string(),
                 "method".to_string(),
@@ -605,6 +609,34 @@ fn time_window_agg_count(store: &Store) {
     }
 }
 
+//
+// SHOW TAG KEYS WHERE time >= x and time < y AND "env" = 'prod01-eu-central-1'
+fn time_tag_keys_with_pred(store: &Store) {
+    let repeat = 1000000;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let columns = segments.tag_keys(
+            (1588834080000000, 1590044410000000),
+            &[("env", "prod01-eu-central-1"), ("method", "GET")],
+        );
+
+        total_time += now.elapsed();
+        track += columns.len();
+        // println!("{:?}", columns);
+    }
+    println!(
+        "time_tag_keys_with_pred ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
 // This is for a performance experiment where I wanted to show the performance
 // change as more columns are grouped on.
 //
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 61081d2de2..808f0e7415 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -1123,16 +1123,16 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_eq(&self, value: Option<Scalar<'_>>) -> Option<croaring::Bitmap> {
         let value = match value {
             Some(v) => v,
             None => return None,
         };
 
-        if !self.maybe_contains(value) {
+        if !self.maybe_contains(&value) {
             return None;
         }
-        self.row_ids(value, std::cmp::Ordering::Equal)
+        self.row_ids(&value, std::cmp::Ordering::Equal)
     }
 
     pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
@@ -1249,6 +1249,18 @@ impl Column {
             }
         }
     }
+
+    // great catchy name... This determines as efficiently as possible if the
+    // column contains a non-null value in at least one of the provided row
+    // ids.
+    //
+    // row_ids *must* be in ascending order.
+    pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool {
+        match self {
+            Column::String(c) => c.data.has_non_null_value_in_row_ids(row_ids),
+            _ => unreachable!("not supported at the moment"),
+        }
+    }
 }
 
 impl std::fmt::Display for Column {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index e99f70cbfa..129fb68fab 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -107,11 +107,11 @@ where
     ///
     /// encoded_values should not be called on nullable columns.
     fn encoded_values(&self, row_ids: &[usize]) -> Vec<T::Native> {
-        panic!("encoded_values not implemented yet");
+        todo!();
     }
 
     fn all_encoded_values(&self) -> Vec<T::Native> {
-        panic!("all_encoded_values not implemented yet");
+        todo!();
     }
 
     // TODO(edd): problem here is returning a slice because we need to own the
@@ -774,6 +774,58 @@ impl DictionaryRLE {
         out
     }
 
+    pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool {
+        let null_encoded_value = self.entry_index.get(&None);
+        if null_encoded_value.is_none() {
+            // there are no NULL entries in this encoded column so return true
+            // as soon a row_id is found that's < the number of rows encoded in
+            // the column.
+            for &id in row_ids {
+                if (id as u64) < self.total {
+                    return true;
+                }
+            }
+            return false;
+        }
+        let null_encoded_value = *null_encoded_value.unwrap();
+
+        // Return true if there exists an encoded value at any of the row ids
+        // that is not equal to `null_encoded_value`. In such a case the column
+        // contains a non-NULL value at one of the row ids.
+        let mut curr_logical_row_id = 0;
+        let mut run_lengths_iter = self.run_lengths.iter();
+        let (mut curr_encoded_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap();
+
+        for &row_id in row_ids {
+            if (row_id as u64) >= self.total {
+                continue; // can't possibly have a value at this row id.
+            }
+
+            while curr_logical_row_id + curr_entry_rl <= row_id as u64 {
+                // this encoded entry does not cover the row we need.
+                // move on to next encoded id
+                curr_logical_row_id += curr_entry_rl;
+                match run_lengths_iter.next() {
+                    Some(res) => {
+                        curr_encoded_id = res.0;
+                        curr_entry_rl = res.1;
+                    }
+                    // TODO(edd): deal with this properly.
+                    None => panic!("shouldn't get here"),
+                }
+            }
+
+            // this entry covers the row_id we want.
+            if curr_encoded_id != null_encoded_value {
+                return true;
+            }
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        false
+    }
+
     /// Return the decoded value for an encoded ID.
     ///
     /// Panics if there is no decoded value for the provided id
@@ -1094,6 +1146,52 @@ mod test {
         assert_eq!(results, exp);
     }
 
+    #[test]
+    fn dict_rle_has_value_no_null() {
+        let mut drle = super::DictionaryRLE::new();
+        let west = Some("west".to_string());
+        let east = Some("east".to_string());
+        let north = Some("north".to_string());
+        drle.push_additional(west, 3);
+        drle.push_additional(east, 2);
+        drle.push_additional(north, 4);
+
+        // w,w,w,e,e,n,n,n,n
+        // 0 1 2 3 4 5 6 7 8
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[0]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[1, 3]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[8]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[12, 132]), false);
+    }
+
+    #[test]
+    fn dict_rle_has_value() {
+        let mut drle = super::DictionaryRLE::new();
+        let west = Some("west".to_string());
+        let east = Some("east".to_string());
+        let north = Some("north".to_string());
+        drle.push_additional(west.clone(), 3);
+        drle.push_additional(None, 1);
+        drle.push_additional(east, 2);
+        drle.push_additional(north, 4);
+        drle.push_additional(None, 4);
+        drle.push_additional(west, 3);
+
+        // w,w,w,?,e,e,n,n,n,n, ?, ?,  ?,  ?,  w,  w,  w
+        // 0 1 2 3 4 5 6 7 8 9 10 11, 12, 13, 14, 15, 16
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[0]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[2, 3]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[2, 3]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[3, 4, 10]), true);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[16, 19]), true);
+
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[3]), false);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[3, 10]), false);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[17]), false);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[17, 19]), false);
+        assert_eq!(drle.has_non_null_value_in_row_ids(&[12, 19]), false);
+    }
+
     #[test]
     fn dict_rle_values() {
         let mut drle = super::DictionaryRLE::new();
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 99cf527d0b..6d3974434e 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::collections::{BTreeMap, BTreeSet};
 
 use super::column;
 use super::column::{AggregateType, Column};
@@ -7,6 +7,23 @@ use arrow::datatypes::SchemaRef;
 // Only used in a couple of specific places for experimentation.
 const THREADS: usize = 16;
 
+/// ColumnType describes the logical type a column can have.
+pub enum ColumnType {
+    Tag(column::Column),
+    Field(column::Column),
+    Time(column::Column),
+}
+
+impl ColumnType {
+    fn num_rows(&self) -> usize {
+        match &self {
+            ColumnType::Tag(c) => c.num_rows(),
+            ColumnType::Field(c) => c.num_rows(),
+            ColumnType::Time(c) => c.num_rows(),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct Schema {
     _ref: SchemaRef,
@@ -51,7 +68,9 @@ pub struct Segment {
     meta: SegmentMetaData,
 
     // Columns within a segment
-    columns: Vec<column::Column>,
+    columns: Vec<Column>,
+
+    tag_column_idxs: Vec<usize>, // todo(edd): add vectors to each type
     time_column_idx: usize,
 }
 
@@ -61,35 +80,46 @@ impl Segment {
         Self {
             meta: SegmentMetaData::new(rows, schema),
             columns: Vec::with_capacity(cols),
+            tag_column_idxs: vec![],
             time_column_idx: 0,
         }
     }
 
-    pub fn add_column(&mut self, name: &str, c: column::Column) {
+    pub fn add_column(&mut self, name: &str, ct: ColumnType) {
         assert_eq!(
             self.meta.rows,
-            c.num_rows(),
+            ct.num_rows(),
             "Column {:?} has {:?} rows but wanted {:?}",
             name,
-            c.num_rows(),
+            ct.num_rows(),
             self.meta.rows
         );
 
-        // TODO(edd) yuk
-        if name == "time" {
-            if let column::Column::Integer(ts) = &c {
-                // Right now assumption is ts column has some non-null values
-                self.meta.time_range = ts.column_range().unwrap();
-            } else {
-                panic!("incorrect column type for time");
-            }
-            self.time_column_idx = self.columns.len();
-        }
-
         // validate column doesn't already exist in segment
         assert!(!self.meta.column_names.contains(&name.to_owned()));
         self.meta.column_names.push(name.to_owned());
-        self.columns.push(c);
+
+        match ct {
+            ColumnType::Time(c) => {
+                assert_eq!(name, "time");
+
+                if let Column::Integer(ts) = &c {
+                    // Right now assumption is ts column has some non-null values
+                    self.meta.time_range = ts.column_range().unwrap();
+                } else {
+                    panic!("incorrect column type for time");
+                }
+                self.time_column_idx = self.columns.len();
+                self.columns.push(c);
+            }
+            ColumnType::Tag(c) => {
+                self.tag_column_idxs.push(self.columns.len());
+                self.columns.push(c);
+            }
+            ColumnType::Field(c) => {
+                self.columns.push(c);
+            }
+        }
     }
 
     pub fn num_rows(&self) -> usize {
@@ -101,7 +131,7 @@ impl Segment {
     }
 
     /// column returns the column with name
-    pub fn column(&self, name: &str) -> Option<&column::Column> {
+    pub fn column(&self, name: &str) -> Option<&Column> {
         if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) {
             return self.columns.get(*id);
         }
@@ -225,7 +255,7 @@ impl Segment {
     pub fn aggregate_by_group_with_hash<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'a>>)],
+        predicates: &[(&str, Option<column::Scalar<'a>>)],
         group_columns: &[String],
         aggregates: &'a [(String, AggregateType)],
         window: i64,
@@ -464,7 +494,7 @@ impl Segment {
     pub fn aggregate_by_group_using_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
@@ -609,7 +639,7 @@ impl Segment {
     pub fn aggregate_by_group_using_stream<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
@@ -859,7 +889,7 @@ impl Segment {
     pub fn filter_by_predicates_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
     ) -> Option<croaring::Bitmap> {
         if !self.meta.overlaps_time_range(time_range.0, time_range.1) {
             return None; // segment doesn't have time range
@@ -877,7 +907,7 @@ impl Segment {
     fn filter_by_predicates_eq_time(
         &self,
         time_range: (i64, i64),
-        predicates: Vec<(&str, Option<&column::Scalar<'_>>)>,
+        predicates: Vec<(&str, Option<column::Scalar<'_>>)>,
     ) -> Option<croaring::Bitmap> {
         // Get all row_ids matching the time range:
         //
@@ -915,7 +945,7 @@ impl Segment {
     // meta row_ids bitmap.
     fn filter_by_predicates_eq_no_time(
         &self,
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
     ) -> Option<croaring::Bitmap> {
         if predicates.is_empty() {
             // In this case there are no predicates provided and we have no time
@@ -930,7 +960,7 @@ impl Segment {
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
                 // TODO(edd): rework this clone
-                match c.row_ids_eq(*col_pred_value) {
+                match c.row_ids_eq(col_pred_value.clone()) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -959,7 +989,7 @@ impl Segment {
     pub fn group_single_agg_by_predicate_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
         group_column: &String,
         aggregates: &Vec<(String, column::AggregateType)>,
     ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate<'_>)>> {
@@ -1021,6 +1051,59 @@ impl Segment {
         }
         grouped_results
     }
+
+    pub fn tag_keys(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, &str)],
+    ) -> BTreeSet<String> {
+        let (seg_min, seg_max) = self.meta.time_range;
+        if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max {
+            // the segment is completely overlapped by the time range of query,
+            // and there are no predicates
+            todo!("fast path")
+        }
+
+        let pred_vec = predicates
+            .iter()
+            .map(|p| (p.0, Some(column::Scalar::String(p.1))))
+            .collect::<Vec<_>>();
+
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) {
+            filtered_row_ids = row_ids;
+        } else {
+            return BTreeSet::new(); // no matching rows for predicate + time range
+        }
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+        log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len());
+        let mut results = BTreeSet::new();
+
+        // any columns that are in predicate set using equality predicates should
+        // be automatically included in results.
+        //
+        // TODO(edd): when predicates get more complicated it's likely this
+        // assumption will be a hard one to make.
+        for (col, _) in predicates {
+            results.insert(String::from(*col));
+        }
+
+        // now check if any of the other tag columns have a non-null value for
+        // any of the filtered ids.
+        for &i in &self.tag_column_idxs {
+            let col = &self.columns[i];
+            if col.has_non_null_value_in_row_ids(&filtered_row_ids_vec) {
+                results.insert(self.column_names().get(i).unwrap().clone());
+            }
+        }
+
+        results
+    }
 }
 
 impl std::fmt::Display for Segment {
@@ -1111,7 +1194,7 @@ impl<'a> Segments<'a> {
     pub fn read_filter_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'_>>)],
+        predicates: &[(&str, Option<column::Scalar<'_>>)],
         select_columns: Vec<String>,
     ) -> BTreeMap<String, column::Vector<'_>> {
         let (min, max) = time_range;
@@ -1147,7 +1230,7 @@ impl<'a> Segments<'a> {
     pub fn read_group_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'a>>)],
+        predicates: &[(&str, Option<column::Scalar<'a>>)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
@@ -1197,7 +1280,7 @@ impl<'a> Segments<'a> {
     fn read_group_eq_hash(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'a>>)],
+        predicates: &[(&str, Option<column::Scalar<'a>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
@@ -1283,7 +1366,7 @@ impl<'a> Segments<'a> {
     fn read_group_eq_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<&column::Scalar<'a>>)],
+        predicates: &[(&str, Option<column::Scalar<'a>>)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
@@ -1524,6 +1607,28 @@ impl<'a> Segments<'a> {
             panic!("time column wrong type!");
         }
     }
+
+    pub fn tag_keys(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, &str)],
+    ) -> BTreeSet<String> {
+        let (min, max) = time_range;
+        if max <= min {
+            panic!("max <= min");
+        }
+
+        let mut columns = BTreeSet::new();
+
+        for segment in &self.segments {
+            if !segment.meta.overlaps_time_range(min, max) {
+                continue; // segment doesn't have time range
+            }
+            columns.append(&mut segment.tag_keys(time_range, predicates));
+        }
+
+        columns
+    }
 }
 
 #[derive(Debug)]

From 3963cf6cce59ea03451dd892b958f81ef0376671 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Tue, 22 Sep 2020 14:57:44 +0100
Subject: [PATCH 63/73] perf: improve performance of tag keys

---
 delorean_mem_qe/src/bin/main.rs | 10 ++++----
 delorean_mem_qe/src/column.rs   |  6 ++---
 delorean_mem_qe/src/segment.rs  | 41 ++++++++++++++++++++++++++-------
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index e6251c0dd4..b1b5768a0e 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -122,10 +122,10 @@ fn build_store(
         match rb {
             Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e),
             Ok(Some(rb)) => {
-                if i < 360 {
-                    i += 1;
-                    continue;
-                }
+                // if i < 360 {
+                //     i += 1;
+                //     continue;
+                // }
                 let schema = Schema::with_sort_order(
                     rb.schema(),
                     sort_order.iter().map(|s| s.to_string()).collect(),
@@ -612,7 +612,7 @@ fn time_window_agg_count(store: &Store) {
 //
 // SHOW TAG KEYS WHERE time >= x and time < y AND "env" = 'prod01-eu-central-1'
 fn time_tag_keys_with_pred(store: &Store) {
-    let repeat = 1000000;
+    let repeat = 10;
     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
     let mut track = 0;
     let segments = store.segments();
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 808f0e7415..15121f1070 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -1123,16 +1123,16 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    pub fn row_ids_eq(&self, value: Option<Scalar<'_>>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_eq(&self, value: &Option<Scalar<'_>>) -> Option<croaring::Bitmap> {
         let value = match value {
             Some(v) => v,
             None => return None,
         };
 
-        if !self.maybe_contains(&value) {
+        if !self.maybe_contains(value) {
             return None;
         }
-        self.row_ids(&value, std::cmp::Ordering::Equal)
+        self.row_ids(value, std::cmp::Ordering::Equal)
     }
 
     pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 6d3974434e..0ef941ad8f 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -916,11 +916,12 @@ impl Segment {
             &column::Scalar::Integer(time_range.0),
             &column::Scalar::Integer(time_range.1),
         )?;
+        log::debug!("time col bitmap contains {:?} values out of {:?} rows. requested range was {:?}, meta range is {:?}",bm.cardinality(),self.num_rows(), time_range, self.meta.time_range);
 
         // now intersect matching rows for each column
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
-                match c.row_ids_eq(col_pred_value) {
+                match c.row_ids_eq(&col_pred_value) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -959,8 +960,7 @@ impl Segment {
         // now intersect matching rows for each column
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
-                // TODO(edd): rework this clone
-                match c.row_ids_eq(col_pred_value.clone()) {
+                match c.row_ids_eq(col_pred_value) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -1056,7 +1056,23 @@ impl Segment {
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, &str)],
-    ) -> BTreeSet<String> {
+        exclude_columns: &BTreeSet<String>,
+    ) -> Option<BTreeSet<String>> {
+        // first check if we have any columns not in the exclusion set.
+        let mut all_excluded = true;
+        for &i in &self.tag_column_idxs {
+            let col_name = self.column_names().get(i).unwrap();
+            if !exclude_columns.contains(col_name) {
+                all_excluded = false;
+                break;
+            }
+        }
+
+        if all_excluded {
+            log::debug!("skipping segment as all tag columns excluded");
+            return None; // we don't have any tag columns to offer.
+        }
+
         let (seg_min, seg_max) = self.meta.time_range;
         if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max {
             // the segment is completely overlapped by the time range of query,
@@ -1073,7 +1089,7 @@ impl Segment {
         if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) {
             filtered_row_ids = row_ids;
         } else {
-            return BTreeSet::new(); // no matching rows for predicate + time range
+            return None; // no matching rows for predicate + time range
         }
 
         let filtered_row_ids_vec = filtered_row_ids
@@ -1097,12 +1113,18 @@ impl Segment {
         // any of the filtered ids.
         for &i in &self.tag_column_idxs {
             let col = &self.columns[i];
+            let col_name = self.column_names().get(i).unwrap();
+
+            if exclude_columns.contains(col_name) {
+                continue;
+            }
+
             if col.has_non_null_value_in_row_ids(&filtered_row_ids_vec) {
-                results.insert(self.column_names().get(i).unwrap().clone());
+                results.insert(col_name.clone());
             }
         }
 
-        results
+        Some(results)
     }
 }
 
@@ -1624,7 +1646,10 @@ impl<'a> Segments<'a> {
             if !segment.meta.overlaps_time_range(min, max) {
                 continue; // segment doesn't have time range
             }
-            columns.append(&mut segment.tag_keys(time_range, predicates));
+            let segment_columns = segment.tag_keys(time_range, predicates, &columns);
+            if let Some(mut result) = segment_columns {
+                columns.append(&mut result);
+            }
         }
 
         columns

From d0f3cae9b317c71e1aacd08378333a72ffa1e99e Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 24 Sep 2020 14:30:27 +0100
Subject: [PATCH 64/73] feat: add tag values schema API

---
 Cargo.lock                      |  52 +++++-------
 delorean_mem_qe/src/bin/main.rs |  33 +++++++-
 delorean_mem_qe/src/column.rs   |  40 +++++++++
 delorean_mem_qe/src/encoding.rs |  78 +++++++++++++++++-
 delorean_mem_qe/src/segment.rs  | 139 ++++++++++++++++++++++++++++++++
 5 files changed, 307 insertions(+), 35 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 63e340c752..7d7c6a3a41 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -87,11 +87,7 @@ checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
-<<<<<<< HEAD
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
-=======
-source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
->>>>>>> 27b73c4... refactor: add encoding trait
 dependencies = [
  "chrono",
  "csv",
@@ -111,7 +107,7 @@ dependencies = [
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
+source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
 dependencies = [
  "chrono",
  "csv",
@@ -669,27 +665,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "2.0.0-SNAPSHOT"
-<<<<<<< HEAD
 source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d"
-=======
-source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
-dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
- "clap",
- "crossbeam",
- "fnv",
- "num_cpus",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
- "paste",
- "rustyline",
- "sqlparser",
-]
-
-[[package]]
-name = "datafusion"
-version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
->>>>>>> 27b73c4... refactor: add encoding trait
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "chrono",
@@ -703,6 +679,22 @@ dependencies = [
  "sqlparser",
 ]
 
+[[package]]
+name = "datafusion"
+version = "2.0.0-SNAPSHOT"
+source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
+dependencies = [
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "clap",
+ "crossbeam",
+ "fnv",
+ "num_cpus",
+ "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "paste",
+ "rustyline",
+ "sqlparser",
+]
+
 [[package]]
 name = "delorean"
 version = "0.1.0"
@@ -713,11 +705,7 @@ dependencies = [
  "clap",
  "criterion",
  "csv",
-<<<<<<< HEAD
  "delorean_arrow",
-=======
- "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)",
->>>>>>> 27b73c4... refactor: add encoding trait
  "delorean_generated_types",
  "delorean_ingest",
  "delorean_line_parser",
@@ -763,7 +751,7 @@ name = "delorean_arrow"
 version = "0.1.0"
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
- "datafusion",
+ "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
  "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
 ]
 
@@ -2111,9 +2099,9 @@ dependencies = [
 [[package]]
 name = "parquet"
 version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4"
+source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
 dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)",
+ "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "brotli",
  "byteorder",
  "chrono",
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index b1b5768a0e..659e1028e9 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -72,7 +72,8 @@ fn main() {
     // time_group_by_multi_agg_count(&store);
     // time_group_by_multi_agg_sorted_count(&store);
     // time_window_agg_count(&store);
-    time_tag_keys_with_pred(&store);
+    // time_tag_keys_with_pred(&store);
+    time_tag_values_with_pred(&store);
     // time_group_by_different_columns(&store);
 }
 
@@ -621,7 +622,7 @@ fn time_tag_keys_with_pred(store: &Store) {
 
         let columns = segments.tag_keys(
             (1588834080000000, 1590044410000000),
-            &[("env", "prod01-eu-central-1"), ("method", "GET")],
+            &[("env", "prod01-eu-central-1")],
         );
 
         total_time += now.elapsed();
@@ -637,6 +638,34 @@ fn time_tag_keys_with_pred(store: &Store) {
     );
 }
 
+//
+// SHOW TAG VALUES ON "host", "method" WHERE time >= x and time < y AND "env" = 'prod01-us-west-1'
+fn time_tag_values_with_pred(store: &Store) {
+    let repeat = 10;
+    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+    let mut track = 0;
+    let segments = store.segments();
+    for _ in 0..repeat {
+        let now = std::time::Instant::now();
+
+        let tag_values = segments.tag_values(
+            (1588834080000000, 1590044410000000),
+            &[("env", "prod01-us-west-2")],
+            &["host".to_string(), "method".to_string()],
+        );
+
+        total_time += now.elapsed();
+        track += tag_values.len();
+    }
+    println!(
+        "time_tag_values_with_pred ran {:?} in {:?} {:?} / call {:?}",
+        repeat,
+        total_time,
+        total_time / repeat,
+        track
+    );
+}
+
 // This is for a performance experiment where I wanted to show the performance
 // change as more columns are grouped on.
 //
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 15121f1070..ee63e51201 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -1,7 +1,14 @@
+use std::collections::BTreeSet;
 use std::convert::From;
 
 use super::encoding;
 
+#[derive(Debug)]
+pub enum Set<'a> {
+    String(BTreeSet<&'a std::string::String>),
+    Integer(BTreeSet<i64>),
+}
+
 #[derive(Debug, PartialEq, PartialOrd, Clone)]
 pub enum Value<'a> {
     Null,
@@ -1261,6 +1268,39 @@ impl Column {
             _ => unreachable!("not supported at the moment"),
         }
     }
+
+    /// This returns the distinct set of values in the column from the set of
+    /// rows provided.
+    ///
+    /// NULL values are not included in the returned set even if present in the
+    /// column at provided rows.
+    ///
+    /// row_ids *must* be in ascending order.
+    pub fn distinct_values(&self, row_ids: &[usize]) -> Set<'_> {
+        match self {
+            Column::String(c) => Set::String(c.data.distinct_values(row_ids)),
+            _ => unreachable!("not supported at the moment"),
+        }
+    }
+
+    /// Returns true if the column contains any values other than those in
+    /// `values`.
+    pub fn contains_other_values(&self, values: &BTreeSet<&std::string::String>) -> bool {
+        match self {
+            Column::String(c) => {
+                // TODO(edd):
+                // had problems with ref inside of enum Set variant.
+
+                // if let Set::String(v) = values {
+                c.data.contains_other_values(values)
+                // } else {
+                // panic!("incompatible set with column type");
+                // }
+                // Set::String(c.data.distinct_values(row_ids))
+            }
+            _ => unreachable!("not supported at the moment"),
+        }
+    }
 }
 
 impl std::fmt::Display for Column {
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 129fb68fab..09467bea63 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, BTreeSet};
+use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::iter;
 use std::mem::size_of;
 
@@ -774,6 +774,82 @@ impl DictionaryRLE {
         out
     }
 
+    /// Returns the unique set of values encoded at each of the provided ids.
+    /// NULL values are not returned.
+    pub fn distinct_values(&self, row_ids: &[usize]) -> BTreeSet<&String> {
+        // TODO(edd): can improve on this if we know encoded data is totally
+        // ordered.
+        let mut encoded_values = HashSet::new();
+
+        let mut curr_logical_row_id = 0;
+        let mut run_lengths_iter = self.run_lengths.iter();
+        let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap();
+
+        'by_row: for row_id in row_ids {
+            while curr_logical_row_id + curr_entry_rl <= *row_id as u64 {
+                // this encoded entry does not cover the row we need.
+                // move on to next entry
+                curr_logical_row_id += curr_entry_rl;
+                match run_lengths_iter.next() {
+                    Some(res) => {
+                        curr_entry_id = res.0;
+                        curr_entry_rl = res.1;
+                    }
+                    None => panic!("shouldn't get here"),
+                }
+            }
+
+            // track encoded value
+            encoded_values.insert(curr_entry_id);
+            if encoded_values.len() == self.index_entry.len() {
+                // all distinct values have been read
+                break 'by_row;
+            }
+
+            curr_logical_row_id += 1;
+            curr_entry_rl -= 1;
+        }
+
+        assert!(encoded_values.len() <= self.index_entry.len());
+
+        // Finally, materialise the decoded values for the encoded set.
+        let mut results = BTreeSet::new();
+        for id in encoded_values.iter() {
+            let decoded_value = self.index_entry.get(id).unwrap();
+            if let Some(value) = decoded_value {
+                results.insert(value);
+            }
+        }
+        results
+    }
+
+    /// Returns true if the encoding contains values other than those provided in
+    /// `values`.
+    pub fn contains_other_values(&self, values: &BTreeSet<&String>) -> bool {
+        let mut encoded_values = self.entry_index.len();
+        if self.entry_index.contains_key(&None) {
+            encoded_values -= 1;
+        }
+
+        if encoded_values > values.len() {
+            return true;
+        }
+
+        for key in self.entry_index.keys() {
+            match key {
+                Some(key) => {
+                    if !values.contains(key) {
+                        return true;
+                    }
+                }
+                None => continue, // skip NULL
+            }
+        }
+        false
+    }
+
+    /// Determines if the encoded data contains at least one non-null value at
+    /// any of the provided row ids.
     pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool {
         let null_encoded_value = self.entry_index.get(&None);
         if null_encoded_value.is_none() {
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 0ef941ad8f..fdde84f430 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -130,6 +130,11 @@ impl Segment {
         &self.meta.column_names
     }
 
+    /// Determines if the segment contains a column with the provided name.
+    pub fn has_column(&self, name: &String) -> bool {
+        self.meta.column_names.contains(name)
+    }
+
     /// column returns the column with name
     pub fn column(&self, name: &str) -> Option<&Column> {
         if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) {
@@ -1126,6 +1131,89 @@ impl Segment {
 
         Some(results)
     }
+
+    pub fn tag_values(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, &str)],
+        tag_keys: &[String],
+        excluded_tag_values: &BTreeMap<String, BTreeSet<&String>>,
+    ) -> Option<BTreeMap<&String, BTreeSet<&String>>> {
+        // first check if we have any columns that should be processed.
+        let mut have_some_cols = false;
+        for &i in &self.tag_column_idxs {
+            let col_name = self.column_names().get(i).unwrap();
+            if tag_keys.contains(col_name) {
+                have_some_cols = true;
+                break;
+            }
+        }
+
+        if !have_some_cols {
+            log::debug!("skipping segment because no columns for tag keys present");
+            return None; // we don't have any tag columns to offer.
+        }
+
+        let (seg_min, seg_max) = self.meta.time_range;
+        if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max {
+            // the segment is completely overlapped by the time range of query,
+            // and there are no predicates
+            todo!("fast path")
+        }
+
+        let pred_vec = predicates
+            .iter()
+            .map(|p| (p.0, Some(column::Scalar::String(p.1))))
+            .collect::<Vec<_>>();
+
+        let filtered_row_ids: croaring::Bitmap;
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) {
+            filtered_row_ids = row_ids;
+        } else {
+            return None; // no matching rows for predicate + time range
+        }
+
+        let mut results = BTreeMap::new();
+
+        let filtered_row_ids_vec = filtered_row_ids
+            .to_vec()
+            .iter()
+            .map(|v| *v as usize)
+            .collect::<Vec<_>>();
+        log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len());
+
+        for &i in &self.tag_column_idxs {
+            let col = &self.columns[i];
+            let col_name = self.column_names().get(i).unwrap();
+
+            if !tag_keys.contains(col_name) {
+                continue;
+            }
+
+            // if !col.contains_other_values(&column::Set::String(
+            //     *excluded_tag_values.get(col_name).unwrap(),
+            // )) {
+            //     log::debug!("skipping!!");
+            //     continue;
+            // }
+
+            if let Some(exclude_tag_values) = excluded_tag_values.get(col_name) {
+                if !col.contains_other_values(exclude_tag_values) {
+                    log::debug!("skipping!!");
+                    continue;
+                }
+            }
+
+            if let column::Set::String(values) = col.distinct_values(&filtered_row_ids_vec) {
+                log::debug!("distinct values: {:?}", values);
+                results.insert(col_name, values);
+            } else {
+                unreachable!("only works on tag columns");
+            }
+        }
+
+        Some(results)
+    }
 }
 
 impl std::fmt::Display for Segment {
@@ -1630,6 +1718,8 @@ impl<'a> Segments<'a> {
         }
     }
 
+    /// Returns the distinct set of tag keys (column names) matching the provided
+    /// predicates and time range.
     pub fn tag_keys(
         &self,
         time_range: (i64, i64),
@@ -1654,6 +1744,55 @@ impl<'a> Segments<'a> {
 
         columns
     }
+
+    /// Returns the distinct set of tag values (column values) for each provided
+    /// tag key, where each returned value lives in a row matching the provided
+    /// predicates and time range.
+    ///
+    /// As a special case, if no values are provided for `tag_keys` then all
+    /// tag key-values are returned for the segments.
+    pub fn tag_values(
+        &self,
+        time_range: (i64, i64),
+        predicates: &[(&str, &str)],
+        tag_keys: &[String],
+    ) -> BTreeMap<String, BTreeSet<&String>> {
+        let (min, max) = time_range;
+        if max <= min {
+            panic!("max <= min");
+        }
+
+        let mut results: BTreeMap<String, BTreeSet<&String>> = BTreeMap::new();
+
+        for segment in &self.segments {
+            if !segment.meta.overlaps_time_range(min, max) {
+                continue; // segment doesn't have time range
+            }
+
+            let col_names = if tag_keys.is_empty() {
+                segment.column_names()
+            } else {
+                tag_keys
+            };
+
+            let segment_values = segment.tag_values(time_range, predicates, col_names, &results);
+            match segment_values {
+                Some(values) => {
+                    for (tag_key, mut tag_values) in values {
+                        if !results.contains_key(tag_key) {
+                            results.insert(tag_key.clone(), tag_values);
+                        } else {
+                            let all_values = results.get_mut(tag_key).unwrap();
+                            all_values.append(&mut tag_values);
+                        }
+                    }
+                }
+                None => continue,
+            }
+        }
+
+        results
+    }
 }
 
 #[derive(Debug)]

From c42d2dcd794d2006a7cc17f6d36cd2d33173a3d1 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 24 Sep 2020 15:41:48 +0100
Subject: [PATCH 65/73] refactor: rebase with delorean_arrow

---
 Cargo.lock                      |  73 +---
 Cargo.toml                      |   2 +-
 delorean_mem_qe/Cargo.toml      |   4 +-
 delorean_mem_qe/src/adapter.rs  | 575 ++++++++++++++++----------------
 delorean_mem_qe/src/bin/main.rs |  87 +++--
 delorean_mem_qe/src/column.rs   |  17 +-
 delorean_mem_qe/src/encoding.rs |   5 +-
 delorean_mem_qe/src/lib.rs      |   2 +-
 delorean_mem_qe/src/segment.rs  |   6 +-
 9 files changed, 355 insertions(+), 416 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7d7c6a3a41..a6f96b0298 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -104,26 +104,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "arrow"
-version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
-dependencies = [
- "chrono",
- "csv",
- "flatbuffers",
- "hex",
- "indexmap",
- "lazy_static",
- "num 0.3.0",
- "prettytable-rs",
- "rand",
- "regex",
- "serde",
- "serde_derive",
- "serde_json",
-]
-
 [[package]]
 name = "arrow"
 version = "2.0.0-SNAPSHOT"
@@ -673,23 +653,7 @@ dependencies = [
  "crossbeam",
  "fnv",
  "num_cpus",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
- "paste",
- "rustyline",
- "sqlparser",
-]
-
-[[package]]
-name = "datafusion"
-version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
-dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
- "clap",
- "crossbeam",
- "fnv",
- "num_cpus",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "parquet",
  "paste",
  "rustyline",
  "sqlparser",
@@ -751,8 +715,8 @@ name = "delorean_arrow"
 version = "0.1.0"
 dependencies = [
  "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
- "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)",
+ "datafusion",
+ "parquet",
 ]
 
 [[package]]
@@ -800,17 +764,15 @@ dependencies = [
 name = "delorean_mem_qe"
 version = "0.1.0"
 dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "chrono",
  "croaring",
  "crossbeam",
- "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
+ "delorean_arrow",
  "delorean_table",
  "env_logger",
  "heapsize",
  "human_format",
  "log",
- "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
  "snafu",
 ]
 
@@ -2096,25 +2058,6 @@ dependencies = [
  "zstd",
 ]
 
-[[package]]
-name = "parquet"
-version = "2.0.0-SNAPSHOT"
-source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd"
-dependencies = [
- "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)",
- "brotli",
- "byteorder",
- "chrono",
- "flate2",
- "lz4",
- "num-bigint 0.3.0",
- "parquet-format",
- "serde_json",
- "snap",
- "thrift",
- "zstd",
-]
-
 [[package]]
 name = "parquet-format"
 version = "2.6.1"
@@ -2877,9 +2820,9 @@ checksum = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f"
 
 [[package]]
 name = "snafu"
-version = "0.6.8"
+version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7f5aed652511f5c9123cf2afbe9c244c29db6effa2abb05c866e965c82405ce"
+checksum = "9c4e6046e4691afe918fd1b603fd6e515bcda5388a1092a9edbada307d159f09"
 dependencies = [
  "doc-comment",
  "futures-core",
@@ -2889,9 +2832,9 @@ dependencies = [
 
 [[package]]
 name = "snafu-derive"
-version = "0.6.8"
+version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebf8f7d5720104a9df0f7076a8682024e958bba0fe9848767bb44f251f3648e9"
+checksum = "7073448732a89f2f3e6581989106067f403d378faeafb4a50812eb814170d3e5"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index fbc65ec72c..59c885c464 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -69,7 +69,7 @@ tracing = "0.1"
 tracing-futures="0.2.4"
 
 http = "0.2.0"
-snafu = "0.6.2"
+snafu = "0.6.9"
 libflate = "1.0.0"
 
 [dev-dependencies]
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 742df5ba19..5c0fbc3f37 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -8,10 +8,8 @@ edition = "2018"
 
 
 [dependencies]
+delorean_arrow = { path = "../delorean_arrow" }
 delorean_table = { path = "../delorean_table" }
-arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
-parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
-datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" }
 heapsize = "0.4.2"
 snafu = "0.6.8"
 croaring = "0.4.5"
diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs
index c2537fa53c..eeb2523a5e 100644
--- a/delorean_mem_qe/src/adapter.rs
+++ b/delorean_mem_qe/src/adapter.rs
@@ -1,330 +1,331 @@
 //! Code for interfacing and running queries in DataFusion
 
-use crate::Store;
-use arrow::{
-    datatypes::{Schema, SchemaRef},
-    record_batch::{RecordBatch, RecordBatchReader},
-    util::pretty,
-};
-use datafusion::prelude::*;
-use datafusion::{
-    datasource::TableProvider,
-    execution::{
-        context::ExecutionContextState,
-        physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition},
-    },
-    logicalplan::{make_logical_plan_node, Expr, LogicalPlan},
-    lp::LogicalPlanNode,
-    optimizer::utils,
-};
+// use crate::Store;
+// use delorean_arrow::arrow::{
+//     datatypes::{Schema, SchemaRef},
+//     record_batch::{RecordBatch, RecordBatchReader},
+//     util::pretty,
+// };
+// use delorean_arrow::datafusion::prelude::*;
+// use delorean_arrow::datafusion::{
+//     datasource::TableProvider,
+//     execution::{
+//         context::ExecutionContextState,
+//         physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition},
+//     },
+//     logicalplan::{make_logical_plan_node, Expr, LogicalPlan},
+//     lp::LogicalPlanNode,
+//     optimizer::utils,
+// };
 
-use crate::column;
-use std::{
-    fmt,
-    sync::{Arc, Mutex},
-};
+// use crate::column;
+// use std::{
+//     fmt,
+//     sync::{Arc, Mutex},
+// };
 
-/// Wrapper to adapt a Store to a DataFusion "TableProvider" --
-/// eventually we could also implement this directly on Store
-pub struct StoreTableSource {
-    store: Arc<Store>,
-}
+// Wrapper to adapt a Store to a DataFusion "TableProvider" --
+// eventually we could also implement this directly on Store
+// pub struct StoreTableSource {
+//     store: Arc<Store>,
+// }
 
-impl<'a> StoreTableSource {
-    pub fn new(store: Arc<Store>) -> Self {
-        Self { store }
-    }
-}
+// impl<'a> StoreTableSource {
+//     pub fn new(store: Arc<Store>) -> Self {
+//         Self { store }
+//     }
+// }
 
-impl TableProvider for StoreTableSource {
-    /// Get a reference to the schema for this table
-    fn schema(&self) -> SchemaRef {
-        self.store.schema()
-    }
+// impl TableProvider for StoreTableSource {
+//     /// Get a reference to the schema for this table
+//     fn schema(&self) -> SchemaRef {
+//         self.store.schema()
+//     }
 
-    /// Perform a scan of a table and return a sequence of iterators over the data (one
-    /// iterator per partition)
-    fn scan(
-        &self,
-        _projection: &Option<Vec<usize>>,
-        _batch_size: usize,
-    ) -> datafusion::error::Result<Vec<Arc<dyn Partition>>> {
-        unimplemented!("scan not yet implemented");
-    }
-}
+//     /// Perform a scan of a table and return a sequence of iterators over the data (one
+//     /// iterator per partition)
+//     fn scan(
+//         &self,
+//         _projection: &Option<Vec<usize>>,
+//         _batch_size: usize,
+//     ) -> delorean_arrow::datafusion::error::Result<Vec<Arc<dyn Partition>>> {
+//         unimplemented!("scan not yet implemented");
+//     }
+// }
 
-/// Prototype of how a Delorean query engine, built on top of
-/// DataFusion, but using specialized column store operators might
-/// look like.
-///
-/// Data from the Segments in the `store` are visible in DataFusion
-/// as a table ("measurement") in this prototype.
-pub struct DeloreanQueryEngine {
-    ctx: ExecutionContext,
-    store: Arc<Store>,
-}
+// /// Prototype of how a Delorean query engine, built on top of
+// /// DataFusion, but using specialized column store operators might
+// /// look like.
+// ///
+// /// Data from the Segments in the `store` are visible in DataFusion
+// /// as a table ("measurement") in this prototype.
+// pub struct DeloreanQueryEngine {
+// ctx: ExecutionContext,
+// store: Arc<Store>,
+// }
 
-impl DeloreanQueryEngine {
-    pub fn new(store: Arc<Store>) -> Self {
-        let start = std::time::Instant::now();
-        let mut ctx = ExecutionContext::new();
-        let source = StoreTableSource::new(store.clone());
-        let source = Box::new(source);
-        ctx.register_table("measurement", source);
-        println!("Completed setup in {:?}", start.elapsed());
-        DeloreanQueryEngine { ctx, store }
-    }
+// impl DeloreanQueryEngine {
+// pub fn new(store: Arc<Store>) -> Self {
+// let start = std::time::Instant::now();
+// let mut ctx = ExecutionContext::new();
+// let source = StoreTableSource::new(store.clone());
+// let source = Box::new(source);
+// ctx.register_table("measurement", source);
+// println!("Completed setup in {:?}", start.elapsed());
+// DeloreanQueryEngine { ctx, store }
+// }
 
-    // Run the specified SQL and return the number of records matched
-    pub fn run_sql(&mut self, sql: &str) -> usize {
-        let plan = self
-            .ctx
-            .create_logical_plan(sql)
-            .expect("Creating the logical plan");
+//     // Run the specified SQL and return the number of records matched
+//     pub fn run_sql(&mut self, sql: &str) -> usize {
+//         let plan = self
+//             .ctx
+//             .create_logical_plan(sql)
+//             .expect("Creating the logical plan");
 
-        //println!("Created logical plan:\n{:?}", plan);
-        let plan = self.rewrite_to_segment_scan(&plan);
-        //println!("Rewritten logical plan:\n{:?}", plan);
+//         //println!("Created logical plan:\n{:?}", plan);
+//         let plan = self.rewrite_to_segment_scan(&plan);
+//         //println!("Rewritten logical plan:\n{:?}", plan);
 
-        match self.ctx.collect_plan(&plan) {
-            Err(err) => {
-                println!("Error running query: {:?}", err);
-                0
-            }
-            Ok(results) => {
-                if results.is_empty() {
-                    //println!("Empty result returned");
-                    0
-                } else {
-                    pretty::print_batches(&results).expect("printing");
-                    results.iter().map(|b| b.num_rows()).sum()
-                }
-            }
-        }
-    }
+//         match self.ctx.collect_plan(&plan) {
+//             Err(err) => {
+//                 println!("Error running query: {:?}", err);
+//                 0
+//             }
+//             Ok(results) => {
+//                 if results.is_empty() {
+//                     //println!("Empty result returned");
+//                     0
+//                 } else {
+//                     pretty::print_batches(&results).expect("printing");
+//                     results.iter().map(|b| b.num_rows()).sum()
+//                 }
+//             }
+//         }
+//     }
 
-    /// Specialized optimizer pass that combines a `TableScan` and a `Filter`
-    /// together into a SegementStore with the predicates.
-    ///
-    /// For example, given this input:
-    ///
-    /// Projection: #env, #method, #host, #counter, #time
-    ///   Filter: #time GtEq Int64(1590036110000000)
-    ///     TableScan: measurement projection=None
-    ///
-    /// The following plan would be produced
-    /// Projection: #env, #method, #host, #counter, #time
-    ///   SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000)
-    ///
-    fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan {
-        if let LogicalPlan::Filter { predicate, input } = plan {
-            // see if the input is a TableScan
-            if let LogicalPlan::TableScan { .. } = **input {
-                return make_logical_plan_node(Box::new(SegmentScan::new(
-                    self.store.clone(),
-                    predicate.clone(),
-                )));
-            }
-        }
+//     /// Specialized optimizer pass that combines a `TableScan` and a `Filter`
+//     /// together into a SegementStore with the predicates.
+//     ///
+//     /// For example, given this input:
+//     ///
+//     /// Projection: #env, #method, #host, #counter, #time
+//     ///   Filter: #time GtEq Int64(1590036110000000)
+//     ///     TableScan: measurement projection=None
+//     ///
+//     /// The following plan would be produced
+//     /// Projection: #env, #method, #host, #counter, #time
+//     ///   SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000)
+//     ///
+//     fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan {
+//         if let LogicalPlan::Filter { predicate, input } = plan {
+//             // see if the input is a TableScan
+//             if let LogicalPlan::TableScan { .. } = **input {
+//                 return make_logical_plan_node(Box::new(SegmentScan::new(
+//                     self.store.clone(),
+//                     predicate.clone(),
+//                 )));
+//             }
+//         }
 
-        // otherwise recursively apply
-        let optimized_inputs = utils::inputs(&plan)
-            .iter()
-            .map(|input| self.rewrite_to_segment_scan(input))
-            .collect();
+//         // otherwise recursively apply
+//         let optimized_inputs = utils::inputs(&plan)
+//             .iter()
+//             .map(|input| self.rewrite_to_segment_scan(input))
+//             .collect();
 
-        return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs)
-            .expect("Created plan");
-    }
-}
+//         return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs)
+//             .expect("Created plan");
+//     }
+// }
 
-/// LogicalPlan node that serves as a scan of the segment store with optional predicates
-struct SegmentScan {
-    /// The underlying Store
-    store: Arc<Store>,
+// /// LogicalPlan node that serves as a scan of the segment store with optional predicates
+// struct SegmentScan {
+//     /// The underlying Store
+//     store: Arc<Store>,
 
-    schema: SchemaRef,
+//     schema: SchemaRef,
 
-    /// The predicate to apply during the scan
-    predicate: Expr,
-}
+//     /// The predicate to apply during the scan
+//     predicate: Expr,
+// }
 
-impl<'a> SegmentScan {
-    fn new(store: Arc<Store>, predicate: Expr) -> Self {
-        let schema = store.schema().clone();
+// impl<'a> SegmentScan {
+//     fn new(store: Arc<Store>, predicate: Expr) -> Self {
+//         let schema = store.schema().clone();
 
-        SegmentScan {
-            store,
-            schema,
-            predicate,
-        }
-    }
-}
+//         SegmentScan {
+//             store,
+//             schema,
+//             predicate,
+//         }
+//     }
+// }
 
-impl LogicalPlanNode for SegmentScan {
-    /// Return  a reference to the logical plan's inputs
-    fn inputs(&self) -> Vec<&LogicalPlan> {
-        Vec::new()
-    }
+// impl LogicalPlanNode for SegmentScan {
+//     /// Return  a reference to the logical plan's inputs
+//     fn inputs(&self) -> Vec<&LogicalPlan> {
+//         Vec::new()
+//     }
 
-    /// Get a reference to the logical plan's schema
-    fn schema(&self) -> &Schema {
-        self.schema.as_ref()
-    }
+//     /// Get a reference to the logical plan's schema
+//     fn schema(&self) -> &Schema {
+//         self.schema.as_ref()
+//     }
 
-    /// returns all expressions (non-recursively) in the current logical plan node.
-    fn expressions(&self) -> Vec<Expr> {
-        // The predicate expression gets absorbed by this node As
-        // there are no inputs, there are no exprs that operate on
-        // inputs
-        Vec::new()
-    }
+//     /// returns all expressions (non-recursively) in the current logical plan node.
+//     fn expressions(&self) -> Vec<Expr> {
+//         // The predicate expression gets absorbed by this node As
+//         // there are no inputs, there are no exprs that operate on
+//         // inputs
+//         Vec::new()
+//     }
 
-    /// Write a single line human readable string to `f` for use in explain plan
-    fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            "SegmentScan: {:?} predicate {:?}",
-            self.store.as_ref() as *const Store,
-            self.predicate
-        )
-    }
+//     /// Write a single line human readable string to `f` for use in explain plan
+//     fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+//         write!(
+//             f,
+//             "SegmentScan: {:?} predicate {:?}",
+//             self.store.as_ref() as *const Store,
+//             self.predicate
+//         )
+//     }
 
-    /// Create a clone of this node.
-    ///
-    /// Note std::Clone needs a Sized type, so we must implement a
-    /// clone that creates a node with a known Size (i.e. Box)
-    //
-    fn dyn_clone(&self) -> Box<dyn LogicalPlanNode> {
-        Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
-    }
+//     /// Create a clone of this node.
+//     ///
+//     /// Note std::Clone needs a Sized type, so we must implement a
+//     /// clone that creates a node with a known Size (i.e. Box)
+//     //
+//     fn dyn_clone(&self) -> Box<dyn LogicalPlanNode> {
+//         Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
+//     }
 
-    /// Create a clone of this LogicalPlanNode with inputs and expressions replaced.
-    ///
-    /// Note that exprs and inputs are in the same order as the result
-    /// of self.inputs and self.exprs.
-    ///
-    /// So, clone_from_template(exprs).exprs() == exprs
-    fn clone_from_template(
-        &self,
-        exprs: &Vec<Expr>,
-        inputs: &Vec<LogicalPlan>,
-    ) -> Box<dyn LogicalPlanNode> {
-        assert_eq!(exprs.len(), 0, "no exprs expected");
-        assert_eq!(inputs.len(), 0, "no inputs expected");
-        Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
-    }
+//     /// Create a clone of this LogicalPlanNode with inputs and expressions replaced.
+//     ///
+//     /// Note that exprs and inputs are in the same order as the result
+//     /// of self.inputs and self.exprs.
+//     ///
+//     /// So, clone_from_template(exprs).exprs() == exprs
+//     fn clone_from_template(
+//         &self,
+//         exprs: &Vec<Expr>,
+//         inputs: &Vec<LogicalPlan>,
+//     ) -> Box<dyn LogicalPlanNode> {
+//         assert_eq!(exprs.len(), 0, "no exprs expected");
+//         assert_eq!(inputs.len(), 0, "no inputs expected");
+//         Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone()))
+//     }
 
-    /// Create the corresponding physical scheplan for this node
-    fn create_physical_plan(
-        &self,
-        input_physical_plans: Vec<Arc<dyn ExecutionPlan>>,
-        _ctx_state: Arc<Mutex<ExecutionContextState>>,
-    ) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
-        assert_eq!(input_physical_plans.len(), 0, "Can not have inputs");
+//     /// Create the corresponding physical scheplan for this node
+//     fn create_physical_plan(
+//         &self,
+//         input_physical_plans: Vec<Arc<dyn ExecutionPlan>>,
+//         _ctx_state: Arc<Mutex<ExecutionContextState>>,
+//     ) -> delorean_arrow::datafusion::error::Result<Arc<dyn ExecutionPlan>> {
+//         assert_eq!(input_physical_plans.len(), 0, "Can not have inputs");
 
-        // If this were real code, we would now progrmatically
-        // transform the DataFusion Expr into the specific form needed
-        // by the Segment. However, to save prototype time we just
-        // hard code it here instead
-        assert_eq!(
-            format!("{:?}", self.predicate),
-            "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")"
-        );
+//         // If this were real code, we would now progrmatically
+//         // transform the DataFusion Expr into the specific form needed
+//         // by the Segment. However, to save prototype time we just
+//         // hard code it here instead
+//         assert_eq!(
+//             format!("{:?}", self.predicate),
+//             "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")"
+//         );
 
-        let time_range = (1590036110000000, 1590040770000000);
-        let string_predicate = StringPredicate {
-            col_name: "env".into(),
-            value: "prod01-eu-central-1".into(),
-        };
+//         let time_range = (1590036110000000, 1590040770000000);
+//         let string_predicate = StringPredicate {
+//             col_name: "env".into(),
+//             value: "prod01-eu-central-1".into(),
+//         };
 
-        Ok(Arc::new(SegmentScanExec::new(
-            self.store.clone(),
-            time_range,
-            string_predicate,
-        )))
-    }
-}
+//         Ok(Arc::new(SegmentScanExec::new(
+//             self.store.clone(),
+//             time_range,
+//             string_predicate,
+//         )))
+//     }
+// }
 
-#[derive(Debug, Clone)]
-struct StringPredicate {
-    col_name: String,
-    value: String,
-}
+// #[derive(Debug, Clone)]
+// struct StringPredicate {
+//     col_name: String,
+//     value: String,
+// }
 
-/// StoreScan execution node
-#[derive(Debug)]
-pub struct SegmentScanExec {
-    store: Arc<Store>,
+// /// StoreScan execution node
+// #[derive(Debug)]
+// pub struct SegmentScanExec {
+//     store: Arc<Store>,
 
-    // Specialized predicates to apply
-    time_range: (i64, i64),
-    string_predicate: StringPredicate,
-}
+//     // Specialized predicates to apply
+//     time_range: (i64, i64),
+//     string_predicate: StringPredicate,
+// }
 
-impl SegmentScanExec {
-    fn new(store: Arc<Store>, time_range: (i64, i64), string_predicate: StringPredicate) -> Self {
-        SegmentScanExec {
-            store,
-            time_range,
-            string_predicate,
-        }
-    }
-}
+// impl SegmentScanExec {
+//     fn new(store: Arc<Store>, time_range: (i64, i64), string_predicate: StringPredicate) -> Self {
+//         SegmentScanExec {
+//             store,
+//             time_range,
+//             string_predicate,
+//         }
+//     }
+// }
 
-impl ExecutionPlan for SegmentScanExec {
-    fn schema(&self) -> SchemaRef {
-        self.store.schema()
-    }
+// impl ExecutionPlan for SegmentScanExec {
+//     fn schema(&self) -> SchemaRef {
+//         self.store.schema()
+//     }
 
-    fn partitions(&self) -> datafusion::error::Result<Vec<Arc<dyn Partition>>> {
-        let store = self.store.clone();
-        Ok(vec![Arc::new(SegmentPartition {
-            store,
-            time_range: self.time_range,
-            string_predicate: self.string_predicate.clone(),
-        })])
-    }
-}
+//     fn partitions(&self) -> delorean_arrow::datafusion::error::Result<Vec<Arc<dyn Partitioning>>> {
+//         let store = self.store.clone();
+//         Ok(vec![Arc::new(SegmentPartition {
+//             store,
+//             time_range: self.time_range,
+//             string_predicate: self.string_predicate.clone(),
+//         })])
+//     }
+// }
 
-#[derive(Debug)]
-struct SegmentPartition {
-    store: Arc<Store>,
-    time_range: (i64, i64),
-    string_predicate: StringPredicate,
-}
+// #[derive(Debug)]
+// struct SegmentPartition {
+//     store: Arc<Store>,
+//     time_range: (i64, i64),
+//     string_predicate: StringPredicate,
+// }
 
-impl Partition for SegmentPartition {
-    fn execute(
-        &self,
-    ) -> datafusion::error::Result<Arc<Mutex<dyn RecordBatchReader + Send + Sync>>> {
-        let combined_results: Vec<Arc<RecordBatch>> = vec![];
+// impl Partition for SegmentPartition {
+//     fn execute(
+//         &self,
+//     ) -> delorean_arrow::datafusion::error::Result<Arc<Mutex<dyn RecordBatchReader + Send + Sync>>>
+//     {
+//         let combined_results: Vec<Arc<RecordBatch>> = vec![];
 
-        let segments = self.store.segments();
+//         let segments = self.store.segments();
 
-        // prepare the string predicates in the manner Segments want them
-        let col_name = &self.string_predicate.col_name;
-        let scalar = column::Scalar::String(&self.string_predicate.value);
+//         // prepare the string predicates in the manner Segments want them
+//         let col_name = &self.string_predicate.col_name;
+//         let scalar = column::Scalar::String(&self.string_predicate.value);
 
-        // Here
-        let _columns = segments.read_filter_eq(
-            self.time_range,
-            &[(col_name, Some(scalar))],
-            vec![
-                "env".to_string(),
-                "method".to_string(),
-                "host".to_string(),
-                "counter".to_string(),
-                "time".to_string(),
-            ],
-        );
+//         // Here
+//         let _columns = segments.read_filter_eq(
+//             self.time_range,
+//             &[(col_name, Some(scalar))],
+//             vec![
+//                 "env".to_string(),
+//                 "method".to_string(),
+//                 "host".to_string(),
+//                 "counter".to_string(),
+//                 "time".to_string(),
+//             ],
+//         );
 
-        // If we were implementing this for real, we would not convert
-        // `columns` into RecordBatches and feed them back out
+//         // If we were implementing this for real, we would not convert
+//         // `columns` into RecordBatches and feed them back out
 
-        Ok(Arc::new(Mutex::new(RecordBatchIterator::new(
-            self.store.schema().clone(),
-            combined_results,
-        ))))
-    }
-}
+//         Ok(Arc::new(Mutex::new(RecordBatchIterator::new(
+//             self.store.schema().clone(),
+//             combined_results,
+//         ))))
+//     }
+// }
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 659e1028e9..cda3563189 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -8,30 +8,21 @@ use std::{
     sync::Arc,
 };
 
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::{array, array::Array, datatypes, ipc};
-
-use delorean_mem_qe::column;
-use delorean_mem_qe::column::{AggregateType, Column};
-use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment};
-use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
-use parquet::arrow::arrow_reader::ArrowReader;
-
-// use snafu::ensure;
 use datatypes::TimeUnit;
 use snafu::Snafu;
 
+use delorean_arrow::arrow::array::StringArrayOps;
+use delorean_arrow::arrow::record_batch::{RecordBatch, RecordBatchReader};
+use delorean_arrow::arrow::{array, array::Array, datatypes, ipc};
+use delorean_arrow::parquet::arrow::arrow_reader::ArrowReader;
+use delorean_mem_qe::column;
+use delorean_mem_qe::column::{AggregateType, Column};
+use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment};
+use delorean_mem_qe::Store;
+// use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store};
+
 #[derive(Snafu, Debug, Clone, Copy, PartialEq)]
-pub enum Error {
-    // #[snafu(display(r#"Too many sort columns specified"#))]
-// TooManyColumns,
-
-// #[snafu(display(r#"Same column specified as sort column multiple times"#))]
-// RepeatedColumns { index: usize },
-
-// #[snafu(display(r#"Specified column index is out bounds"#))]
-// OutOfBoundsColumn { index: usize },
-}
+pub enum Error {}
 
 fn format_size(sz: usize) -> String {
     human_format::Formatter::new().format(sz as f64)
@@ -88,9 +79,11 @@ fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) ->
         path
     );
 
-    let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap();
-    let mut reader =
-        parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader));
+    let parquet_reader =
+        delorean_arrow::parquet::file::reader::SerializedFileReader::new(r).unwrap();
+    let mut reader = delorean_arrow::parquet::arrow::arrow_reader::ParquetFileArrowReader::new(
+        Rc::new(parquet_reader),
+    );
     let batch_size = 60000;
     let record_batch_reader = reader.get_record_reader(batch_size).unwrap();
     build_store(record_batch_reader, store, sort_order)
@@ -419,32 +412,32 @@ fn time_select_with_pred(store: &Store) {
 //
 // Use the hard coded timestamp values 1590036110000000, 1590040770000000
 
-fn time_datafusion_select_with_pred(store: Arc<Store>) {
-    let mut query_engine = DeloreanQueryEngine::new(store);
+// fn time_datafusion_select_with_pred(store: Arc<Store>) {
+//     let mut query_engine = DeloreanQueryEngine::new(store);
 
-    let sql_string = r#"SELECT env, method, host, counter, time
-               FROM measurement
-               WHERE time::BIGINT >= 1590036110000000
-               AND time::BIGINT < 1590040770000000
-               AND env = 'prod01-eu-central-1'
-     "#;
+//     let sql_string = r#"SELECT env, method, host, counter, time
+//                FROM measurement
+//                WHERE time::BIGINT >= 1590036110000000
+//                AND time::BIGINT < 1590040770000000
+//                AND env = 'prod01-eu-central-1'
+//      "#;
 
-    let repeat = 100;
-    let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-    let mut track = 0;
-    for _ in 0..repeat {
-        let now = std::time::Instant::now();
-        track += query_engine.run_sql(&sql_string);
-        total_time += now.elapsed();
-    }
-    println!(
-        "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}",
-        repeat,
-        total_time,
-        total_time / repeat,
-        track
-    );
-}
+//     let repeat = 100;
+//     let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
+//     let mut track = 0;
+//     for _ in 0..repeat {
+//         let now = std::time::Instant::now();
+//         track += query_engine.run_sql(&sql_string);
+//         total_time += now.elapsed();
+//     }
+//     println!(
+//         "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}",
+//         repeat,
+//         total_time,
+//         total_time / repeat,
+//         track
+//     );
+// }
 
 //
 // SELECT env, method, host, counter, time
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index ee63e51201..d2686f7e5d 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -1,6 +1,8 @@
 use std::collections::BTreeSet;
 use std::convert::From;
 
+use delorean_arrow::arrow;
+
 use super::encoding;
 
 #[derive(Debug)]
@@ -1331,21 +1333,20 @@ impl AggregatableByRange for &Column {
     }
 }
 
-use arrow::array::{Float64Array, Int64Array, TimestampMicrosecondArray};
-impl From<Float64Array> for Column {
+impl From<arrow::array::Float64Array> for Column {
     fn from(arr: arrow::array::Float64Array) -> Self {
         Self::Float(NumericColumn::from(arr))
     }
 }
 
-impl From<TimestampMicrosecondArray> for Column {
-    fn from(arr: TimestampMicrosecondArray) -> Self {
+impl From<arrow::array::TimestampMicrosecondArray> for Column {
+    fn from(arr: arrow::array::TimestampMicrosecondArray) -> Self {
         Self::Integer(NumericColumn::from(arr))
     }
 }
 
-impl From<Int64Array> for Column {
-    fn from(arr: Int64Array) -> Self {
+impl From<arrow::array::Int64Array> for Column {
+    fn from(arr: arrow::array::Int64Array) -> Self {
         Self::Integer(NumericColumn::from(arr))
     }
 }
@@ -1682,7 +1683,9 @@ where
     }
 }
 
-use arrow::array::Array;
+use delorean_arrow::arrow::array::Array;
+use delorean_arrow::arrow::array::PrimitiveArrayOps;
+
 impl From<arrow::array::Float64Array> for NumericColumn<f64> {
     fn from(arr: arrow::array::Float64Array) -> Self {
         let len = arr.len();
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 09467bea63..3b33ed5e8e 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -2,8 +2,9 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::iter;
 use std::mem::size_of;
 
-use arrow::array::{Array, PrimitiveArray};
-use arrow::datatypes::ArrowNumericType;
+use delorean_arrow::arrow::array::PrimitiveArrayOps;
+use delorean_arrow::arrow::array::{Array, PrimitiveArray};
+use delorean_arrow::arrow::datatypes::ArrowNumericType;
 
 pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug {
     type Item;
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index 29bb987eac..ad32402145 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -5,7 +5,7 @@ pub mod encoding;
 pub mod segment;
 pub mod sorter;
 
-use arrow::datatypes::SchemaRef;
+use delorean_arrow::arrow::datatypes::SchemaRef;
 use segment::{Segment, Segments};
 
 #[derive(Debug, Default)]
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index fdde84f430..f42a90e31d 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet};
 
 use super::column;
 use super::column::{AggregateType, Column};
-use arrow::datatypes::SchemaRef;
+use delorean_arrow::arrow::datatypes::SchemaRef;
 
 // Only used in a couple of specific places for experimentation.
 const THREADS: usize = 16;
@@ -1812,12 +1812,12 @@ pub struct GroupedAggregates<'a> {
 #[cfg(test)]
 mod test {
 
-    use arrow::datatypes::*;
+    use delorean_arrow::arrow::datatypes::*;
 
     #[test]
     fn segment_group_key_sorted() {
         let schema = super::Schema::with_sort_order(
-            arrow::datatypes::SchemaRef::new(Schema::new(vec![
+            delorean_arrow::arrow::datatypes::SchemaRef::new(Schema::new(vec![
                 Field::new("env", DataType::Utf8, false),
                 Field::new("role", DataType::Utf8, false),
                 Field::new("path", DataType::Utf8, false),

From 9eee0c2852ccf81cfedc4fa3bc65a8b12042b7ba Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Thu, 24 Sep 2020 17:11:14 +0100
Subject: [PATCH 66/73] refactor: make clippy happy

---
 Cargo.lock                          |  1 +
 delorean_ingest/src/lib.rs          |  7 ++++---
 delorean_mem_qe/Cargo.toml          |  1 +
 delorean_mem_qe/benches/encoding.rs | 10 ++++------
 delorean_mem_qe/src/bin/main.rs     | 30 +++++++++++++----------------
 delorean_mem_qe/src/column.rs       |  8 ++++----
 delorean_mem_qe/src/encoding.rs     | 19 +++++++++---------
 delorean_mem_qe/src/lib.rs          |  1 +
 delorean_mem_qe/src/segment.rs      | 12 +++++-------
 delorean_table/src/sorter.rs        | 12 ++++--------
 10 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a6f96b0298..8cd8705170 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -765,6 +765,7 @@ name = "delorean_mem_qe"
 version = "0.1.0"
 dependencies = [
  "chrono",
+ "criterion",
  "croaring",
  "crossbeam",
  "delorean_arrow",
diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs
index 9a0223e8b7..45a9374d9e 100644
--- a/delorean_ingest/src/lib.rs
+++ b/delorean_ingest/src/lib.rs
@@ -813,9 +813,9 @@ impl TSMFileConverter {
                     println!("verifying order");
                     let values = packed_columns[12].i64_packer_mut().values();
                     let mut last = values[0];
-                    for i in 1..values.len() {
-                        assert!(values[i] >= last);
-                        last = values[i];
+                    for &v in values.iter().skip(1) {
+                        assert!(v >= last);
+                        last = v;
                     }
                     println!("finished sort in {:?}", now.elapsed());
 
@@ -823,6 +823,7 @@ impl TSMFileConverter {
                     write_arrow_file(schema, packed_columns).unwrap();
                     println!("Done!");
 
+                    let _ = self.table_writer_source;
                     // if packed_columns.len() < 13 {
                     //     continue;
                     // }
diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml
index 5c0fbc3f37..c25677fbe5 100644
--- a/delorean_mem_qe/Cargo.toml
+++ b/delorean_mem_qe/Cargo.toml
@@ -22,3 +22,4 @@ human_format = "1.0.3"
 
 
 [dev-dependencies]
+criterion = "0.3"
\ No newline at end of file
diff --git a/delorean_mem_qe/benches/encoding.rs b/delorean_mem_qe/benches/encoding.rs
index 504ce64c8d..34ad9108ee 100644
--- a/delorean_mem_qe/benches/encoding.rs
+++ b/delorean_mem_qe/benches/encoding.rs
@@ -24,7 +24,7 @@ fn benchmark_row_ids(
             let mut input = delorean_mem_qe::encoding::DictionaryRLE::new();
             let values = batch_size / cardinality;
             for i in 0..cardinality {
-                input.push_additional(i.to_string().as_str(), values as u64);
+                input.push_additional(Some(i.to_string()), values as u64);
             }
             group.throughput(Throughput::Bytes(batch_size as u64));
 
@@ -35,9 +35,7 @@ fn benchmark_row_ids(
                     b.iter(|| {
                         // do work
                         for i in 0..cardinality {
-                            let ids = input
-                                .row_ids(i.to_string().as_str())
-                                .collect::<Vec<usize>>();
+                            let _ = input.row_ids(Some(i.to_string())).collect::<Vec<usize>>();
                         }
                     });
                 },
@@ -68,7 +66,7 @@ fn benchmark_row_ids_roaring(
             let mut input = delorean_mem_qe::encoding::DictionaryRLE::new();
             let values = batch_size / cardinality;
             for i in 0..cardinality {
-                input.push_additional(i.to_string().as_str(), values as u64);
+                input.push_additional(Some(i.to_string()), values as u64);
             }
             group.throughput(Throughput::Bytes(batch_size as u64));
 
@@ -79,7 +77,7 @@ fn benchmark_row_ids_roaring(
                     b.iter(|| {
                         // do work
                         for i in 0..cardinality {
-                            let ids = input.row_ids_roaring(i.to_string().as_str());
+                            let _ = input.row_ids_eq_roaring(Some(i.to_string()));
                         }
                     });
                 },
diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index cda3563189..7b5e26c42b 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -54,18 +54,18 @@ fn main() {
     );
     let store = Arc::new(store);
 
-    // time_select_with_pred(&store);
+    time_select_with_pred(&store);
     // time_datafusion_select_with_pred(store.clone());
-    // time_first_host(&store);
-    // time_sum_range(&store);
-    // time_count_range(&store);
-    // time_group_single_with_pred(&store);
-    // time_group_by_multi_agg_count(&store);
-    // time_group_by_multi_agg_sorted_count(&store);
-    // time_window_agg_count(&store);
-    // time_tag_keys_with_pred(&store);
+    time_first_host(&store);
+    time_sum_range(&store);
+    time_count_range(&store);
+    time_group_single_with_pred(&store);
+    time_group_by_multi_agg_count(&store);
+    time_group_by_multi_agg_sorted_count(&store);
+    time_window_agg_count(&store);
+    time_tag_keys_with_pred(&store);
     time_tag_values_with_pred(&store);
-    // time_group_by_different_columns(&store);
+    time_group_by_different_columns(&store);
 }
 
 fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> {
@@ -110,7 +110,6 @@ fn build_store(
 ) -> Result<(), Error> {
     let mut total_rows_read = 0;
     let start = std::time::Instant::now();
-    let mut i = 0;
     loop {
         let rb = reader.next_batch();
         match rb {
@@ -269,7 +268,7 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er
             datatypes::DataType::Boolean => {
                 panic!("unsupported");
             }
-            ref d @ _ => panic!("unsupported datatype: {:?}", d),
+            _ => panic!("unsupported datatype"),
         }
     }
     Ok(())
@@ -458,7 +457,7 @@ fn time_group_single_with_pred(store: &Store) {
                 (1588834080000000, 1590044410000000),
                 &[],
                 &"env".to_string(),
-                &vec![("counter".to_string(), AggregateType::Count)],
+                &[("counter".to_string(), AggregateType::Count)],
             );
             track += results.len();
         }
@@ -689,14 +688,12 @@ fn time_group_by_different_columns(store: &Store) {
     for strat in &strats {
         let repeat = 10;
         let mut total_time: std::time::Duration = std::time::Duration::new(0, 0);
-        let mut total_max = 0;
         let segments = store.segments();
-
         for i in 1..=cols.len() {
             for _ in 0..repeat {
                 let now = std::time::Instant::now();
 
-                let groups = segments.read_group_eq(
+                segments.read_group_eq(
                     (1589000000000001, 1590044410000000),
                     &[],
                     cols[0..i].to_vec(),
@@ -706,7 +703,6 @@ fn time_group_by_different_columns(store: &Store) {
                 );
 
                 total_time += now.elapsed();
-                total_max += groups.len();
             }
             println!(
                 "time_group_by_different_columns{:?} cols: {:?} ran {:?} in {:?} {:?}",
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index d2686f7e5d..d03eb0c6e1 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -981,7 +981,7 @@ impl Column {
             Column::String(c) => {
                 if let Scalar::String(v) = value {
                     if let Some(range) = c.meta.range() {
-                        range.1 < v.to_string()
+                        &range.1.as_str() < v
                     } else {
                         false
                     }
@@ -1020,7 +1020,7 @@ impl Column {
             Column::String(c) => {
                 if let Scalar::String(v) = value {
                     if let Some(range) = c.meta.range() {
-                        range.0 > v.to_string()
+                        &range.0.as_str() > v
                     } else {
                         false
                     }
@@ -1905,7 +1905,7 @@ pub mod metadata {
 
         pub fn size(&self) -> usize {
             // size of types for num_rows and range
-            let base_size = size_of::<usize>() + (2 * size_of::<Option<String>>());
+            size_of::<usize>() + (2 * size_of::<Option<String>>())
 
             //
             //  TODO: figure out a way to specify that T must be able to describe its runtime size.
@@ -1916,7 +1916,7 @@ pub mod metadata {
             //     (None, Some(max)) => base_size + max.len(),
             //     (Some(min), Some(max)) => base_size + min.len() + max.len(),
             // }
-            base_size
+            // base_size
         }
     }
 
diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index 3b33ed5e8e..a3c0e2b6a6 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -107,7 +107,7 @@ where
     /// supports null values then the values returned are undefined.
     ///
     /// encoded_values should not be called on nullable columns.
-    fn encoded_values(&self, row_ids: &[usize]) -> Vec<T::Native> {
+    fn encoded_values(&self, _: &[usize]) -> Vec<T::Native> {
         todo!();
     }
 
@@ -117,7 +117,7 @@ where
 
     // TODO(edd): problem here is returning a slice because we need to own the
     // backing vector.
-    fn scan_from(&self, row_id: usize) -> &[Option<T::Native>] {
+    fn scan_from(&self, _: usize) -> &[Option<T::Native>] {
         unimplemented!("need to figure out returning a slice");
         // let mut out = Vec::with_capacity(self.arr.len() - row_id);
         // for i in row_id..self.arr.len() {
@@ -185,7 +185,7 @@ where
         count // if there are no non-null rows the result is 0 rather than NULL
     }
 
-    fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 {
+    fn count_by_ids(&self, _: &croaring::Bitmap) -> u64 {
         todo!()
     }
 
@@ -213,8 +213,8 @@ where
 
     fn row_ids_single_cmp_roaring(
         &self,
-        wanted: &Self::Item,
-        order: std::cmp::Ordering,
+        _: &Self::Item,
+        _: std::cmp::Ordering,
     ) -> croaring::Bitmap {
         todo!()
     }
@@ -224,7 +224,6 @@ where
 
         let mut found = false; //self.values[0];
         let mut count = 0;
-        let mut i = 0;
         for i in 0..self.arr.len() {
             let next = &self.arr.value(i);
             if (self.arr.is_null(i) || next < from || next >= to) && found {
@@ -385,7 +384,7 @@ where
         self.values.clone() // TODO(edd):perf probably can return reference to vec.
     }
 
-    fn scan_from(&self, row_id: usize) -> &[Option<Self::Item>] {
+    fn scan_from(&self, _: usize) -> &[Option<Self::Item>] {
         unimplemented!("this should probably take a destination vector or maybe a closure");
         // &self.values[row_id..]
     }
@@ -1311,9 +1310,9 @@ mod test {
         let east = Some("east".to_string());
         let north = Some("north".to_string());
         drle.push_additional(west.clone(), 3);
-        drle.push_additional(east.clone(), 2);
-        drle.push_additional(north.clone(), 4);
-        drle.push_additional(west.clone(), 3);
+        drle.push_additional(east, 2);
+        drle.push_additional(north, 4);
+        drle.push_additional(west, 3);
 
         let results = drle.encoded_values(&[0, 1, 4, 5]);
 
diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs
index ad32402145..b625096f1f 100644
--- a/delorean_mem_qe/src/lib.rs
+++ b/delorean_mem_qe/src/lib.rs
@@ -1,4 +1,5 @@
 #![deny(rust_2018_idioms)]
+#![allow(clippy::type_complexity)]
 pub mod adapter;
 pub mod column;
 pub mod encoding;
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index f42a90e31d..66a6299a74 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -131,8 +131,8 @@ impl Segment {
     }
 
     /// Determines if the segment contains a column with the provided name.
-    pub fn has_column(&self, name: &String) -> bool {
-        self.meta.column_names.contains(name)
+    pub fn has_column(&self, name: &str) -> bool {
+        self.meta.column_names.contains(&name.to_string())
     }
 
     /// column returns the column with name
@@ -445,9 +445,7 @@ impl Segment {
             //
             // TODO(edd): this is probably a bit of a perf suck.
             for (col_name, row_value) in &aggregate_row {
-                for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in
-                    group_key_entry.iter_mut()
-                {
+                for &mut (cum_col_name, _, ref mut cum_agg_value) in group_key_entry.iter_mut() {
                     if col_name != cum_col_name {
                         continue;
                     }
@@ -995,8 +993,8 @@ impl Segment {
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, Option<column::Scalar<'_>>)],
-        group_column: &String,
-        aggregates: &Vec<(String, column::AggregateType)>,
+        group_column: &str,
+        aggregates: &[(String, column::AggregateType)],
     ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate<'_>)>> {
         let mut grouped_results = BTreeMap::new();
 
diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs
index 7911b1b310..f242913db2 100644
--- a/delorean_table/src/sorter.rs
+++ b/delorean_table/src/sorter.rs
@@ -186,14 +186,10 @@ fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> boo
                 }
                 Packers::Integer(p) => {
                     let vec = p.values();
-                    if vec[i - 1] < vec[i] {
-                        continue 'row_wise;
-                    } else if vec[i - 1] == vec[i] {
-                        // try next column
-                        continue;
-                    } else {
-                        // value is > so
-                        return false;
+                    match vec[i - 1].cmp(&vec[i]) {
+                        Ordering::Less => continue 'row_wise,
+                        Ordering::Equal => continue,
+                        Ordering::Greater => return false,
                     }
                 }
                 _ => continue, // don't compare on non-string / timestamp cols

From 10511ae08b3ad87829b037d7d2716893e211db3b Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 10:03:56 +0100
Subject: [PATCH 67/73] refactor: restore tsm ingest

---
 delorean_ingest/src/lib.rs | 132 +++----------------------------------
 1 file changed, 9 insertions(+), 123 deletions(-)

diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs
index 45a9374d9e..16bf2d7773 100644
--- a/delorean_ingest/src/lib.rs
+++ b/delorean_ingest/src/lib.rs
@@ -719,132 +719,18 @@ impl TSMFileConverter {
 
             match next_measurement {
                 Some(mut table) => {
-                    if table.name != "http_api_requests_total" {
-                        continue;
-                    }
                     // convert (potentially merged) measurement..
-                    let (schema, mut packed_columns) =
+                    let (schema, packed_columns) =
                         Self::process_measurement_table(&mut block_reader, &mut table)?;
+                    let mut table_writer = self
+                        .table_writer_source
+                        .next_writer(&schema)
+                        .context(WriterCreation)?;
 
-                    // println!("col def {:?}", schema.get_col_defs());
-                    // // cardinality
-                    // for (i, col) in packed_columns.iter().enumerate() {
-                    //     println!("processing column {:?}", i);
-                    //     if let Packers::String(p) = col {
-                    //         let mut set: std::collections::BTreeSet<_> = BTreeSet::new();
-                    //         for v in p.iter() {
-                    //             if let Some(v) = v {
-                    //                 set.insert(String::from(v.as_utf8().unwrap()));
-                    //             }
-                    //         }
-                    //         println!("Cardinality for col is {:?}", set.len());
-                    //     }
-                    // }
-                    // col def [ColumnDefinition { name: "env", index: 0, data_type: String },
-                    // ColumnDefinition { name: "handler", index: 1, data_type: String },
-                    // ColumnDefinition { name: "host", index: 2, data_type: String },
-                    // ColumnDefinition { name: "hostname", index: 3, data_type: String },
-                    //  ColumnDefinition { name: "method", index: 4, data_type: String },
-                    //  ColumnDefinition { name: "nodename", index: 5, data_type: String },
-                    //   ColumnDefinition { name: "path", index: 6, data_type: String },
-                    //   ColumnDefinition { name: "role", index: 7, data_type: String },
-                    //   ColumnDefinition { name: "status", index: 8, data_type: String },
-                    //    ColumnDefinition { name: "url", index: 9, data_type: String },
-                    //    ColumnDefinition { name: "user_agent", index: 10, data_type: String },
-                    //    ColumnDefinition { name: "counter", index: 11, data_type: Float },
-                    //    ColumnDefinition { name: "time", index: 12, data_type: Timestamp }]
-                    // processing column 0
-                    // Cardinality for col is 8
-                    // processing column 1
-                    // Cardinality for col is 8
-                    // processing column 2
-                    // Cardinality for col is 3005
-                    // processing column 3
-                    // Cardinality for col is 3005
-                    // processing column 4
-                    // Cardinality for col is 6
-                    // processing column 5
-                    // Cardinality for col is 148
-                    // processing column 6
-                    // Cardinality for col is 78
-                    // processing column 7
-                    // Cardinality for col is 14
-                    // processing column 8
-                    // Cardinality for col is 4
-                    // processing column 9
-                    // Cardinality for col is 6
-                    // processing column 10
-                    // Cardinality for col is 71
-                    // processing column 11
-                    // processing column 12
-                    // got all card
-                    // println!("got all card");
-
-                    // sort low to high ==
-                    //
-                    // status       8  (4)
-                    // method       4  (6)
-                    // url          9  (6)
-                    // env          0  (8)
-                    // handler      1  (8)
-                    // role         7  (14)
-                    // user_agent   10 (71)
-                    // path         6  (78)
-                    // nodename     5  (148)
-                    // host         2  (3005)
-                    // hostname     3  (3005)
-                    //
-                    // time         12
-
-                    if packed_columns.len() < 13 {
-                        continue;
-                    }
-
-                    println!("length of column s is {:?}", packed_columns.len());
-                    // let sort = [0, 7, 6, 12];
-                    // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
-                    // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
-                    let sort = [12];
-                    println!("Starting sort with {:?}", sort);
-                    let now = std::time::Instant::now();
-
-                    delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap();
-
-                    println!("verifying order");
-                    let values = packed_columns[12].i64_packer_mut().values();
-                    let mut last = values[0];
-                    for &v in values.iter().skip(1) {
-                        assert!(v >= last);
-                        last = v;
-                    }
-                    println!("finished sort in {:?}", now.elapsed());
-
-                    println!("Writing to arrow file!");
-                    write_arrow_file(schema, packed_columns).unwrap();
-                    println!("Done!");
-
-                    let _ = self.table_writer_source;
-                    // if packed_columns.len() < 13 {
-                    //     continue;
-                    // }
-                    // println!("length of column s is {:?}", packed_columns.len());
-                    // // let sort = [0, 7, 6, 12];
-                    // // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
-                    // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
-                    // println!("Starting sort with {:?}", sort);
-                    // let now = std::time::Instant::now();
-                    // delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap();
-                    // println!("finished sort in {:?}", now.elapsed());
-
-                    // let mut table_writer = self
-                    //     .table_writer_source
-                    //     .next_writer(&schema)
-                    //     .context(WriterCreation)?;
-
-                    // table_writer
-                    //     .write_batch(&packed_columns)
-                    //     .context(WriterCreation)?;
-                    // table_writer.close().context(WriterCreation)?;
+                    table_writer
+                        .write_batch(&packed_columns)
+                        .context(WriterCreation)?;
+                    table_writer.close().context(WriterCreation)?;
                 }
                 None => break,
             }

From d8fd1db0948b03740992a073319ee41544787d88 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 10:05:14 +0100
Subject: [PATCH 68/73] test: fix test

---
 delorean_mem_qe/src/encoding.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs
index a3c0e2b6a6..26bc940d80 100644
--- a/delorean_mem_qe/src/encoding.rs
+++ b/delorean_mem_qe/src/encoding.rs
@@ -1055,10 +1055,7 @@ mod test {
             arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]),
         };
 
-        // let encoded = col.all();
-        // assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]);
-
-        let sum = col.sum_by_id_range(0, 1);
+        let sum = col.sum_by_id_range(0, 2);
         assert_eq!(sum, Some(46.86));
     }
 

From 6fcf0fd656bbe93744e27b4e0a82a8b79b221909 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 10:12:30 +0100
Subject: [PATCH 69/73] refactor: fix clippy

---
 delorean_ingest/src/lib.rs | 264 ++++++++++++++++++-------------------
 1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs
index 16bf2d7773..ae680e5da1 100644
--- a/delorean_ingest/src/lib.rs
+++ b/delorean_ingest/src/lib.rs
@@ -11,7 +11,7 @@
 
 use delorean_line_parser::{FieldValue, ParsedLine};
 use delorean_table::{
-    packers::{Packer, PackerChunker, Packers},
+    packers::{Packer, Packers},
     ByteArray, DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError,
 };
 use delorean_table_schema::{DataType, Schema, SchemaBuilder};
@@ -508,153 +508,153 @@ fn pack_lines<'a>(schema: &Schema, lines: &[ParsedLine<'a>]) -> Vec<Packers> {
     packers
 }
 
-use arrow::array;
-use arrow::datatypes;
-use arrow::ipc::writer;
-use arrow::record_batch;
-use std::fs::File;
-use std::sync::Arc;
+// use arrow::array;
+// use arrow::datatypes;
+// use arrow::ipc::writer;
+// use arrow::record_batch;
+// use std::fs::File;
+// use std::sync::Arc;
 
-fn arrow_datatype(datatype: DataType) -> datatypes::DataType {
-    match datatype {
-        DataType::Float => datatypes::DataType::Float64,
-        DataType::Integer => datatypes::DataType::Int64,
-        DataType::String => datatypes::DataType::Utf8,
-        // DataType::String => datatypes::DataType::Dictionary(
-        //     std::boxed::Box::new(datatypes::DataType::Int16),
-        //     std::boxed::Box::new(datatypes::DataType::Utf8),
-        // ),
-        DataType::Boolean => datatypes::DataType::Boolean,
-        DataType::Timestamp => datatypes::DataType::Int64,
-    }
-}
+// fn arrow_datatype(datatype: DataType) -> datatypes::DataType {
+//     match datatype {
+//         DataType::Float => datatypes::DataType::Float64,
+//         DataType::Integer => datatypes::DataType::Int64,
+//         DataType::String => datatypes::DataType::Utf8,
+//         // DataType::String => datatypes::DataType::Dictionary(
+//         //     std::boxed::Box::new(datatypes::DataType::Int16),
+//         //     std::boxed::Box::new(datatypes::DataType::Utf8),
+//         // ),
+//         DataType::Boolean => datatypes::DataType::Boolean,
+//         DataType::Timestamp => datatypes::DataType::Int64,
+//     }
+// }
 
-fn write_arrow_file(parquet_schema: Schema, packers: Vec<Packers>) -> Result<(), Error> {
-    let file = File::create("/tmp/http_api_requests_total.arrow").unwrap();
+// fn write_arrow_file(parquet_schema: Schema, packers: Vec<Packers>) -> Result<(), Error> {
+//     let file = File::create("/tmp/http_api_requests_total.arrow").unwrap();
 
-    let mut record_batch_fields: Vec<datatypes::Field> = vec![];
-    // no default() on Field...
-    record_batch_fields.resize(
-        parquet_schema.get_col_defs().len(),
-        datatypes::Field::new("foo", datatypes::DataType::Int64, false),
-    );
+//     let mut record_batch_fields: Vec<datatypes::Field> = vec![];
+//     // no default() on Field...
+//     record_batch_fields.resize(
+//         parquet_schema.get_col_defs().len(),
+//         datatypes::Field::new("foo", datatypes::DataType::Int64, false),
+//     );
 
-    for col_def in parquet_schema.get_col_defs() {
-        let nullable = col_def.data_type != DataType::Timestamp;
-        // if col_def.data_type == DataType::Timestamp {
-        //     nullable = false;
-        // } else {
-        //     nullable = true;
-        // }
+//     for col_def in parquet_schema.get_col_defs() {
+//         let nullable = col_def.data_type != DataType::Timestamp;
+//         // if col_def.data_type == DataType::Timestamp {
+//         //     nullable = false;
+//         // } else {
+//         //     nullable = true;
+//         // }
 
-        record_batch_fields[col_def.index as usize] = datatypes::Field::new(
-            col_def.name.as_str(),
-            arrow_datatype(col_def.data_type),
-            nullable,
-        );
-    }
-    println!("{:?}", record_batch_fields);
-    println!("{:?}", parquet_schema.get_col_defs());
-    let schema = datatypes::Schema::new(record_batch_fields);
+//         record_batch_fields[col_def.index as usize] = datatypes::Field::new(
+//             col_def.name.as_str(),
+//             arrow_datatype(col_def.data_type),
+//             nullable,
+//         );
+//     }
+//     println!("{:?}", record_batch_fields);
+//     println!("{:?}", parquet_schema.get_col_defs());
+//     let schema = datatypes::Schema::new(record_batch_fields);
 
-    let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap();
+//     let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap();
 
-    // let num_rows = packers[0].num_rows();
-    let batch_size = 60_000;
+//     // let num_rows = packers[0].num_rows();
+//     let batch_size = 60_000;
 
-    let mut packer_chunkers: Vec<PackerChunker<'_>> = vec![];
-    for packer in &packers {
-        packer_chunkers.push(packer.chunk_values(batch_size));
-    }
+//     let mut packer_chunkers: Vec<PackerChunker<'_>> = vec![];
+//     for packer in &packers {
+//         packer_chunkers.push(packer.chunk_values(batch_size));
+//     }
 
-    loop {
-        let mut chunked_packers: Vec<Packers> = Vec::with_capacity(packers.len());
-        for chunker in &mut packer_chunkers {
-            match chunker {
-                PackerChunker::Float(c) => {
-                    if let Some(chunk) = c.next() {
-                        chunked_packers.push(Packers::Float(Packer::from(chunk)));
-                    }
-                }
-                PackerChunker::Integer(c) => {
-                    if let Some(chunk) = c.next() {
-                        chunked_packers.push(Packers::Integer(Packer::from(chunk)));
-                    }
-                }
-                PackerChunker::String(c) => {
-                    if let Some(chunk) = c.next() {
-                        chunked_packers.push(Packers::String(Packer::from(chunk)));
-                    }
-                }
-                PackerChunker::Boolean(c) => {
-                    if let Some(chunk) = c.next() {
-                        chunked_packers.push(Packers::Boolean(Packer::from(chunk)));
-                    }
-                }
-            }
-        }
+//     loop {
+//         let mut chunked_packers: Vec<Packers> = Vec::with_capacity(packers.len());
+//         for chunker in &mut packer_chunkers {
+//             match chunker {
+//                 PackerChunker::Float(c) => {
+//                     if let Some(chunk) = c.next() {
+//                         chunked_packers.push(Packers::Float(Packer::from(chunk)));
+//                     }
+//                 }
+//                 PackerChunker::Integer(c) => {
+//                     if let Some(chunk) = c.next() {
+//                         chunked_packers.push(Packers::Integer(Packer::from(chunk)));
+//                     }
+//                 }
+//                 PackerChunker::String(c) => {
+//                     if let Some(chunk) = c.next() {
+//                         chunked_packers.push(Packers::String(Packer::from(chunk)));
+//                     }
+//                 }
+//                 PackerChunker::Boolean(c) => {
+//                     if let Some(chunk) = c.next() {
+//                         chunked_packers.push(Packers::Boolean(Packer::from(chunk)));
+//                     }
+//                 }
+//             }
+//         }
 
-        if chunked_packers.is_empty() {
-            break;
-        }
+//         if chunked_packers.is_empty() {
+//             break;
+//         }
 
-        // let sort = [0, 7, 6, 12];
-        // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
-        let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
-        delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap();
+//         // let sort = [0, 7, 6, 12];
+//         // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12];
+//         let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12];
+//         delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap();
 
-        println!(
-            "Writing {:?} packers with size: {:?}",
-            chunked_packers.len(),
-            chunked_packers[0].num_rows()
-        );
-        write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers);
-    }
+//         println!(
+//             "Writing {:?} packers with size: {:?}",
+//             chunked_packers.len(),
+//             chunked_packers[0].num_rows()
+//         );
+//         write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers);
+//     }
 
-    writer.finish().unwrap();
-    Ok(())
-}
+//     writer.finish().unwrap();
+//     Ok(())
+// }
 
-fn write_arrow_batch(
-    w: &mut writer::StreamWriter<File>,
-    schema: Arc<datatypes::Schema>,
-    packers: Vec<Packers>,
-) {
-    let mut record_batch_arrays: Vec<array::ArrayRef> = vec![];
+// fn write_arrow_batch(
+//     w: &mut writer::StreamWriter<File>,
+//     schema: Arc<datatypes::Schema>,
+//     packers: Vec<Packers>,
+// ) {
+//     let mut record_batch_arrays: Vec<array::ArrayRef> = vec![];
 
-    for packer in packers {
-        match packer {
-            Packers::Float(p) => {
-                record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec())));
-            }
-            Packers::Integer(p) => {
-                record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec())));
-            }
-            Packers::String(p) => {
-                let mut builder = array::StringBuilder::new(p.num_rows());
-                for v in p.values() {
-                    match v {
-                        Some(v) => {
-                            builder.append_value(v.as_utf8().unwrap()).unwrap();
-                        }
-                        None => {
-                            builder.append_null().unwrap();
-                        }
-                    }
-                }
-                let array = builder.finish();
-                record_batch_arrays.push(Arc::new(array));
-            }
-            Packers::Boolean(p) => {
-                let array = array::BooleanArray::from(p.values().to_vec());
-                record_batch_arrays.push(Arc::new(array));
-            }
-        }
-    }
+//     for packer in packers {
+//         match packer {
+//             Packers::Float(p) => {
+//                 record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec())));
+//             }
+//             Packers::Integer(p) => {
+//                 record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec())));
+//             }
+//             Packers::String(p) => {
+//                 let mut builder = array::StringBuilder::new(p.num_rows());
+//                 for v in p.values() {
+//                     match v {
+//                         Some(v) => {
+//                             builder.append_value(v.as_utf8().unwrap()).unwrap();
+//                         }
+//                         None => {
+//                             builder.append_null().unwrap();
+//                         }
+//                     }
+//                 }
+//                 let array = builder.finish();
+//                 record_batch_arrays.push(Arc::new(array));
+//             }
+//             Packers::Boolean(p) => {
+//                 let array = array::BooleanArray::from(p.values().to_vec());
+//                 record_batch_arrays.push(Arc::new(array));
+//             }
+//         }
+//     }
 
-    let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap();
-    w.write(&record_batch).unwrap();
-}
+//     let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap();
+//     w.write(&record_batch).unwrap();
+// }
 
 /// Converts one or more TSM files into the delorean_table internal columnar
 /// data format and then passes that converted data to a `DeloreanTableWriter`.

From 27e81fab2da089780a1d8980af8265b7394947d5 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 11:24:32 +0100
Subject: [PATCH 70/73] refactor: rework values/scalars

---
 delorean_mem_qe/src/bin/main.rs |   2 +-
 delorean_mem_qe/src/column.rs   | 202 +++++++++++++++-----------------
 delorean_mem_qe/src/segment.rs  |  96 +++++++--------
 3 files changed, 137 insertions(+), 163 deletions(-)

diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs
index 7b5e26c42b..9c039e2fee 100644
--- a/delorean_mem_qe/src/bin/main.rs
+++ b/delorean_mem_qe/src/bin/main.rs
@@ -380,7 +380,7 @@ fn time_select_with_pred(store: &Store) {
 
         let columns = segments.read_filter_eq(
             (1590036110000000, 1590040770000000),
-            &[("env", Some(column::Scalar::String("prod01-eu-central-1")))],
+            &[("env", "prod01-eu-central-1")],
             vec![
                 "env".to_string(),
                 "method".to_string(),
diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index d03eb0c6e1..25de6af868 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -15,23 +15,19 @@ pub enum Set<'a> {
 pub enum Value<'a> {
     Null,
     String(&'a str),
-    Scalar(Scalar<'a>),
+    Scalar(Scalar),
 }
 
 #[derive(Debug, PartialEq, PartialOrd, Clone)]
-pub enum Scalar<'a> {
-    String(&'a str),
+pub enum Scalar {
     Float(f64),
     Integer(i64),
     Unsigned32(u32),
 }
 
-impl<'a> Scalar<'a> {
+impl Scalar {
     pub fn reset(&mut self) {
         match self {
-            Scalar::String(_s) => {
-                panic!("not supported");
-            }
             Scalar::Float(v) => {
                 *v = 0.0;
             }
@@ -44,7 +40,7 @@ impl<'a> Scalar<'a> {
         }
     }
 
-    pub fn add(&mut self, other: Scalar<'a>) {
+    pub fn add(&mut self, other: Scalar) {
         match self {
             Self::Float(v) => {
                 if let Self::Float(other) = other {
@@ -67,17 +63,14 @@ impl<'a> Scalar<'a> {
                     panic!("invalid");
                 };
             }
-            Self::String(_) => {
-                unreachable!("not possible to add strings");
-            }
         }
     }
 }
 
-impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> {
-    type Output = Scalar<'a>;
+impl<'a> std::ops::Add<&Scalar> for &mut Scalar {
+    type Output = Scalar;
 
-    fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
+    fn add(self, _rhs: &Scalar) -> Self::Output {
         match *self {
             Scalar::Float(v) => {
                 if let Scalar::Float(other) = _rhs {
@@ -100,17 +93,14 @@ impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> {
                     panic!("invalid");
                 }
             }
-            Scalar::String(_) => {
-                unreachable!("not possible to add strings");
-            }
         }
     }
 }
 
-impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
-    type Output = Scalar<'a>;
+impl<'a> std::ops::Add<&Scalar> for Scalar {
+    type Output = Scalar;
 
-    fn add(self, _rhs: &Scalar<'a>) -> Self::Output {
+    fn add(self, _rhs: &Scalar) -> Self::Output {
         match self {
             Self::Float(v) => {
                 if let Self::Float(other) = _rhs {
@@ -133,15 +123,12 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> {
                     panic!("invalid");
                 }
             }
-            Self::String(_) => {
-                unreachable!("not possible to add strings");
-            }
         }
     }
 }
 
-impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> {
-    fn add_assign(&mut self, _rhs: &Scalar<'a>) {
+impl<'a> std::ops::AddAssign<&Scalar> for &mut Scalar {
+    fn add_assign(&mut self, _rhs: &Scalar) {
         match self {
             Scalar::Float(v) => {
                 if let Scalar::Float(other) = _rhs {
@@ -164,15 +151,12 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> {
                     panic!("invalid");
                 };
             }
-            Scalar::String(_) => {
-                unreachable!("not possible to add strings");
-            }
         }
     }
 }
 
-impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
-    fn add_assign(&mut self, _rhs: &Scalar<'a>) {
+impl<'a> std::ops::AddAssign<&Scalar> for Scalar {
+    fn add_assign(&mut self, _rhs: &Scalar) {
         match self {
             Self::Float(v) => {
                 if let Self::Float(other) = _rhs {
@@ -195,19 +179,16 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> {
                     panic!("invalid");
                 };
             }
-            Self::String(_) => {
-                unreachable!("not possible to add strings");
-            }
         }
     }
 }
 
 #[derive(Clone, Debug)]
-pub enum Aggregate<'a> {
+pub enum Aggregate {
     Count(u64),
     // Sum can be `None` is for example all values being aggregated are themselves
     // `None`.
-    Sum(Option<Scalar<'a>>),
+    Sum(Option<Scalar>),
 }
 
 #[derive(Debug, Clone)]
@@ -216,10 +197,10 @@ pub enum AggregateType {
     Sum,
 }
 
-// impl<'a> std::ops::Add<&Option<Scalar<'a>>> for Aggregate<'a> {
-//     type Output = Aggregate<'a>;
+// impl<'a> std::ops::Add<&Option<Scalar>> for Aggregate {
+//     type Output = Aggregate;
 
-//     fn add(self, _rhs: &Option<Scalar<'a>>) -> Self::Output {
+//     fn add(self, _rhs: &Option<Scalar>) -> Self::Output {
 //         match self {
 //             Self::Count(self_count) => match _rhs {
 //                 Some(other_scalar) => match other_scalar {
@@ -252,10 +233,10 @@ pub enum AggregateType {
 //     }
 // }
 
-// impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> {
-//     type Output = Aggregate<'a>;
+// impl<'a> std::ops::Add<&Aggregate> for Aggregate {
+//     type Output = Aggregate;
 
-//     fn add(self, _rhs: &Aggregate<'a>) -> Self::Output {
+//     fn add(self, _rhs: &Aggregate) -> Self::Output {
 //         match self {
 //             Self::Count(self_count) => {
 //                 if let Self::Count(other) = _rhs {
@@ -288,7 +269,7 @@ pub trait AggregatableByRange {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate<'_>;
+    ) -> Aggregate;
 }
 
 /// A Vector is a materialised vector of values from a column.
@@ -317,7 +298,7 @@ impl<'a> Vector<'a> {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate<'a> {
+    ) -> Aggregate {
         match agg_type {
             AggregateType::Count => {
                 Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id))
@@ -328,7 +309,7 @@ impl<'a> Vector<'a> {
 
     // Return the sum of values in the vector. NULL values are ignored. If there
     // are no non-null values in the vector being summed then None is returned.
-    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Scalar<'a>> {
+    fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Scalar> {
         match self {
             Self::NullString(_) => {
                 panic!("can't sum strings....");
@@ -528,7 +509,7 @@ impl<'a> Vector<'a> {
     /// position `i` is NULL then `None` is returned.
     //
     // TODO - sort out
-    pub fn get_scalar(&self, i: usize) -> Option<Scalar<'a>> {
+    pub fn get_scalar(&self, i: usize) -> Option<Scalar> {
         match self {
             Self::NullString(_) => panic!("unsupported get_scalar"),
             Self::NullFloat(v) => match v[i] {
@@ -575,7 +556,7 @@ impl AggregatableByRange for &Vector<'_> {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate<'_> {
+    ) -> Aggregate {
         Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
     }
 }
@@ -621,7 +602,7 @@ impl<'a> NullVectorIterator<'a> {
     }
 }
 impl<'a> Iterator for NullVectorIterator<'a> {
-    type Item = Option<Scalar<'a>>;
+    type Item = Option<Scalar>;
 
     fn next(&mut self) -> Option<Self::Item> {
         let curr_i = self.next_i;
@@ -696,39 +677,39 @@ impl Column {
 
     /// Materialise the decoded value matching the provided logical
     /// row id.
-    pub fn value(&self, row_id: usize) -> Option<Scalar<'_>> {
+    pub fn value<'a>(&'a self, row_id: usize) -> Value<'a> {
         match self {
             Column::String(c) => {
                 if row_id >= self.num_rows() {
-                    return None;
+                    return Value::Null;
                 }
 
                 match c.value(row_id) {
-                    Some(v) => Some(Scalar::String(v)),
-                    None => None,
+                    Some(v) => Value::String(v),
+                    None => Value::Null,
                 }
             }
             Column::Float(c) => {
                 if row_id >= self.num_rows() {
-                    return None;
+                    return Value::Null;
                 }
 
                 let v = c.value(row_id);
                 if let Some(v) = v {
-                    return Some(Scalar::Float(v));
+                    return Value::Scalar(Scalar::Float(v));
                 }
-                None
+                Value::Null
             }
             Column::Integer(c) => {
                 if row_id >= self.num_rows() {
-                    return None;
+                    return Value::Null;
                 }
 
                 let v = c.value(row_id);
                 if let Some(v) = v {
-                    return Some(Scalar::Integer(v));
+                    return Value::Scalar(Scalar::Integer(v));
                 }
-                None
+                Value::Null
             }
         }
     }
@@ -949,37 +930,37 @@ impl Column {
         }
     }
 
-    pub fn maybe_contains(&self, value: &Scalar<'_>) -> bool {
+    pub fn maybe_contains(&self, value: &Value<'_>) -> bool {
         match self {
             Column::String(c) => {
-                if let Scalar::String(v) = value {
+                if let Value::String(v) = value {
                     c.meta.maybe_contains_value(v.to_string())
+                } else {
+                    panic!("a String column cannot contain a non-string value");
+                }
+            }
+            Column::Float(c) => {
+                if let Value::Scalar(Scalar::Float(v)) = value {
+                    c.meta.maybe_contains_value(*v)
                 } else {
                     panic!("invalid value");
                 }
             }
-            Column::Float(c) => {
-                if let Scalar::Float(v) = value {
-                    c.meta.maybe_contains_value(*v)
-                } else {
-                    panic!("invalid value or unsupported null");
-                }
-            }
             Column::Integer(c) => {
-                if let Scalar::Integer(v) = value {
+                if let Value::Scalar(Scalar::Integer(v)) = value {
                     c.meta.maybe_contains_value(*v)
                 } else {
-                    panic!("invalid value or unsupported null");
+                    panic!("invalid value");
                 }
             }
         }
     }
 
     /// returns true if the column cannot contain
-    pub fn max_less_than(&self, value: &Scalar<'_>) -> bool {
+    pub fn max_less_than(&self, value: &Value<'_>) -> bool {
         match self {
             Column::String(c) => {
-                if let Scalar::String(v) = value {
+                if let Value::String(v) = value {
                     if let Some(range) = c.meta.range() {
                         &range.1.as_str() < v
                     } else {
@@ -990,7 +971,7 @@ impl Column {
                 }
             }
             Column::Float(c) => {
-                if let Scalar::Float(v) = value {
+                if let Value::Scalar(Scalar::Float(v)) = value {
                     if let Some(range) = c.meta.range() {
                         range.1 < *v
                     } else {
@@ -1001,7 +982,7 @@ impl Column {
                 }
             }
             Column::Integer(c) => {
-                if let Scalar::Integer(v) = value {
+                if let Value::Scalar(Scalar::Integer(v)) = value {
                     if let Some(range) = c.meta.range() {
                         range.1 < *v
                     } else {
@@ -1015,10 +996,10 @@ impl Column {
     }
 
     // TODO(edd): consolodate with max_less_than... Should just be single cmp function
-    pub fn min_greater_than(&self, value: &Scalar<'_>) -> bool {
+    pub fn min_greater_than(&self, value: &Value<'_>) -> bool {
         match self {
             Column::String(c) => {
-                if let Scalar::String(v) = value {
+                if let Value::String(v) = value {
                     if let Some(range) = c.meta.range() {
                         &range.0.as_str() > v
                     } else {
@@ -1029,7 +1010,7 @@ impl Column {
                 }
             }
             Column::Float(c) => {
-                if let Scalar::Float(v) = value {
+                if let Value::Scalar(Scalar::Float(v)) = value {
                     if let Some(range) = c.meta.range() {
                         range.0 > *v
                     } else {
@@ -1040,7 +1021,7 @@ impl Column {
                 }
             }
             Column::Integer(c) => {
-                if let Scalar::Integer(v) = value {
+                if let Value::Scalar(Scalar::Integer(v)) = value {
                     if let Some(range) = c.meta.range() {
                         range.0 > *v
                     } else {
@@ -1054,43 +1035,43 @@ impl Column {
     }
 
     /// Returns the minimum value contained within this column.
-    pub fn min(&self) -> Option<Scalar<'_>> {
+    pub fn min(&self) -> Value<'_> {
         match self {
             Column::String(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::String(&range.0)),
-                None => None,
+                Some(range) => Value::String(&range.0),
+                None => Value::Null,
             },
             Column::Float(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::Float(range.0)),
-                None => None,
+                Some(range) => Value::Scalar(Scalar::Float(range.0)),
+                None => Value::Null,
             },
             Column::Integer(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::Integer(range.0)),
-                None => None,
+                Some(range) => Value::Scalar(Scalar::Integer(range.0)),
+                None => Value::Null,
             },
         }
     }
 
     /// Returns the maximum value contained within this column.
     // FIXME(edd): Support NULL integers and floats
-    pub fn max(&self) -> Option<Scalar<'_>> {
+    pub fn max(&self) -> Value<'_> {
         match self {
             Column::String(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::String(&range.1)),
-                None => None,
+                Some(range) => Value::String(&range.1),
+                None => Value::Null,
             },
             Column::Float(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::Float(range.1)),
-                None => None,
+                Some(range) => Value::Scalar(Scalar::Float(range.1)),
+                None => Value::Null,
             },
             Column::Integer(c) => match c.meta.range() {
-                Some(range) => Some(Scalar::Integer(range.1)),
-                None => None,
+                Some(range) => Value::Scalar(Scalar::Integer(range.1)),
+                None => Value::Null,
             },
         }
     }
 
-    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar<'_>> {
+    pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option<Scalar> {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => match c.sum_by_ids(row_ids) {
@@ -1106,7 +1087,7 @@ impl Column {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate<'_> {
+    ) -> Aggregate {
         match self {
             Column::String(_) => unimplemented!("not implemented"),
             Column::Float(c) => match agg_type {
@@ -1132,11 +1113,10 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    pub fn row_ids_eq(&self, value: &Option<Scalar<'_>>) -> Option<croaring::Bitmap> {
-        let value = match value {
-            Some(v) => v,
-            None => return None,
-        };
+    pub fn row_ids_eq(&self, value: &Value<'_>) -> Option<croaring::Bitmap> {
+        if let Value::Null = value {
+            return None; // don't support "IS NULL" yet.
+        }
 
         if !self.maybe_contains(value) {
             return None;
@@ -1144,14 +1124,14 @@ impl Column {
         self.row_ids(value, std::cmp::Ordering::Equal)
     }
 
-    pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_gt(&self, value: &Value<'_>) -> Option<croaring::Bitmap> {
         if self.max_less_than(value) {
             return None;
         }
         self.row_ids(value, std::cmp::Ordering::Greater)
     }
 
-    pub fn row_ids_lt(&self, value: &Scalar<'_>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_lt(&self, value: &Value<'_>) -> Option<croaring::Bitmap> {
         if self.min_greater_than(value) {
             return None;
         }
@@ -1164,7 +1144,11 @@ impl Column {
     // or
     //
     //      WHERE counter >= 102.2 AND counter < 2929.32
-    pub fn row_ids_gte_lt(&self, low: &Scalar<'_>, high: &Scalar<'_>) -> Option<croaring::Bitmap> {
+    pub fn row_ids_gte_lt(&self, low: &Value<'_>, high: &Value<'_>) -> Option<croaring::Bitmap> {
+        if let (Value::Null, _) | (_, Value::Null) = (low, high) {
+            panic!("unsupported NULL value in range");
+        }
+
         match self {
             Column::String(_c) => {
                 unimplemented!("not implemented yet");
@@ -1176,7 +1160,9 @@ impl Column {
                     None => return None,
                 };
 
-                if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) {
+                if let (Value::Scalar(Scalar::Float(low)), Value::Scalar(Scalar::Float(high))) =
+                    (low, high)
+                {
                     if low <= col_min && high > col_max {
                         // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
@@ -1204,7 +1190,9 @@ impl Column {
                     None => return None,
                 };
 
-                if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) {
+                if let (Value::Scalar(Scalar::Integer(low)), Value::Scalar(Scalar::Integer(high))) =
+                    (low, high)
+                {
                     if low <= col_min && high > col_max {
                         // In this case the query completely covers the range of the column.
                         // TODO: PERF - need to _not_ return a bitset rather than
@@ -1229,28 +1217,28 @@ impl Column {
     }
 
     // TODO(edd) shouldn't let roaring stuff leak out...
-    fn row_ids(&self, value: &Scalar<'_>, order: std::cmp::Ordering) -> Option<croaring::Bitmap> {
+    fn row_ids(&self, value: &Value<'_>, order: std::cmp::Ordering) -> Option<croaring::Bitmap> {
         match self {
             Column::String(c) => {
                 if order != std::cmp::Ordering::Equal {
                     unimplemented!("> < not supported on strings yet");
                 }
 
-                if let Scalar::String(v) = value {
+                if let Value::String(v) = value {
                     Some(c.data.row_ids_eq_roaring(Some(v.to_string())))
                 } else {
                     panic!("invalid value");
                 }
             }
             Column::Float(c) => {
-                if let Scalar::Float(v) = value {
+                if let Value::Scalar(Scalar::Float(v)) = value {
                     Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
                 }
             }
             Column::Integer(c) => {
-                if let Scalar::Integer(v) = value {
+                if let Value::Scalar(Scalar::Integer(v)) = value {
                     Some(c.data.row_ids_single_cmp_roaring(v, order))
                 } else {
                     panic!("invalid value or unsupported null");
@@ -1328,7 +1316,7 @@ impl AggregatableByRange for &Column {
         agg_type: &AggregateType,
         from_row_id: usize,
         to_row_id: usize,
-    ) -> Aggregate<'_> {
+    ) -> Aggregate {
         Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id)
     }
 }
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 66a6299a74..3cc74aa03c 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -260,11 +260,11 @@ impl Segment {
     pub fn aggregate_by_group_with_hash<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'a>>)],
+        predicates: &[(&str, &str)],
         group_columns: &[String],
         aggregates: &'a [(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, column::Aggregate<'a>)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, column::Aggregate)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -375,10 +375,10 @@ impl Segment {
         // hashMap is about 20% faster than BTreeMap in this case
         let mut hash_table: BTreeMap<
             Vec<i64>,
-            Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)>,
+            Vec<(&'a String, &'a AggregateType, column::Aggregate)>,
         > = BTreeMap::new();
 
-        let mut aggregate_row: Vec<(&str, Option<column::Scalar<'_>>)> =
+        let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
             std::iter::repeat_with(|| ("", None))
                 .take(aggregate_itrs.len())
                 .collect();
@@ -424,7 +424,7 @@ impl Segment {
 
             // This is cheaper than allocating a key and using the entry API
             if !hash_table.contains_key(&group_key) {
-                let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)> =
+                let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate)> =
                     Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((
@@ -497,7 +497,7 @@ impl Segment {
     pub fn aggregate_by_group_using_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
@@ -642,7 +642,7 @@ impl Segment {
     pub fn aggregate_by_group_using_stream<'a>(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
@@ -870,11 +870,7 @@ impl Segment {
         vec![]
     }
 
-    pub fn sum_column(
-        &self,
-        name: &str,
-        row_ids: &mut croaring::Bitmap,
-    ) -> Option<column::Scalar<'_>> {
+    pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
         if let Some(c) = self.column(name) {
             return c.sum_by_ids(row_ids);
         }
@@ -892,7 +888,7 @@ impl Segment {
     pub fn filter_by_predicates_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
     ) -> Option<croaring::Bitmap> {
         if !self.meta.overlaps_time_range(time_range.0, time_range.1) {
             return None; // segment doesn't have time range
@@ -904,27 +900,27 @@ impl Segment {
             // so don't  need to intersect predicate results with time column.
             return self.filter_by_predicates_eq_no_time(predicates);
         }
-        self.filter_by_predicates_eq_time(time_range, predicates.to_vec())
+        self.filter_by_predicates_eq_time(time_range, predicates)
     }
 
     fn filter_by_predicates_eq_time(
         &self,
         time_range: (i64, i64),
-        predicates: Vec<(&str, Option<column::Scalar<'_>>)>,
+        predicates: &[(&str, &str)],
     ) -> Option<croaring::Bitmap> {
         // Get all row_ids matching the time range:
         //
         //  time > time_range.0 AND time < time_range.1
         let mut bm = self.columns[self.time_column_idx].row_ids_gte_lt(
-            &column::Scalar::Integer(time_range.0),
-            &column::Scalar::Integer(time_range.1),
+            &column::Value::Scalar(column::Scalar::Integer(time_range.0)),
+            &column::Value::Scalar(column::Scalar::Integer(time_range.1)),
         )?;
         log::debug!("time col bitmap contains {:?} values out of {:?} rows. requested range was {:?}, meta range is {:?}",bm.cardinality(),self.num_rows(), time_range, self.meta.time_range);
 
         // now intersect matching rows for each column
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
-                match c.row_ids_eq(&col_pred_value) {
+                match c.row_ids_eq(&column::Value::String(col_pred_value)) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -949,7 +945,7 @@ impl Segment {
     // meta row_ids bitmap.
     fn filter_by_predicates_eq_no_time(
         &self,
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
     ) -> Option<croaring::Bitmap> {
         if predicates.is_empty() {
             // In this case there are no predicates provided and we have no time
@@ -963,7 +959,7 @@ impl Segment {
         // now intersect matching rows for each column
         for (col_pred_name, col_pred_value) in predicates {
             if let Some(c) = self.column(col_pred_name) {
-                match c.row_ids_eq(col_pred_value) {
+                match c.row_ids_eq(&column::Value::String(col_pred_value)) {
                     Some(row_ids) => {
                         if row_ids.is_empty() {
                             return None;
@@ -992,10 +988,10 @@ impl Segment {
     pub fn group_single_agg_by_predicate_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
         group_column: &str,
         aggregates: &[(String, column::AggregateType)],
-    ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate<'_>)>> {
+    ) -> BTreeMap<u32, Vec<((String, AggregateType), column::Aggregate)>> {
         let mut grouped_results = BTreeMap::new();
 
         let filter_row_ids: croaring::Bitmap;
@@ -1011,7 +1007,7 @@ impl Segment {
                 let mut filtered_row_ids = row_ids.and(&filter_row_ids);
                 if !filtered_row_ids.is_empty() {
                     // First calculate all of the aggregates for this grouped value
-                    let mut aggs: Vec<((String, AggregateType), column::Aggregate<'_>)> =
+                    let mut aggs: Vec<((String, AggregateType), column::Aggregate)> =
                         Vec::with_capacity(aggregates.len());
 
                     for (col_name, agg) in aggregates {
@@ -1083,13 +1079,8 @@ impl Segment {
             todo!("fast path")
         }
 
-        let pred_vec = predicates
-            .iter()
-            .map(|p| (p.0, Some(column::Scalar::String(p.1))))
-            .collect::<Vec<_>>();
-
         let filtered_row_ids: croaring::Bitmap;
-        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) {
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
             filtered_row_ids = row_ids;
         } else {
             return None; // no matching rows for predicate + time range
@@ -1159,13 +1150,8 @@ impl Segment {
             todo!("fast path")
         }
 
-        let pred_vec = predicates
-            .iter()
-            .map(|p| (p.0, Some(column::Scalar::String(p.1))))
-            .collect::<Vec<_>>();
-
         let filtered_row_ids: croaring::Bitmap;
-        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) {
+        if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) {
             filtered_row_ids = row_ids;
         } else {
             return None; // no matching rows for predicate + time range
@@ -1302,7 +1288,7 @@ impl<'a> Segments<'a> {
     pub fn read_filter_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'_>>)],
+        predicates: &[(&str, &str)],
         select_columns: Vec<String>,
     ) -> BTreeMap<String, column::Vector<'_>> {
         let (min, max) = time_range;
@@ -1338,12 +1324,12 @@ impl<'a> Segments<'a> {
     pub fn read_group_eq(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'a>>)],
+        predicates: &[(&str, &str)],
         group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         strategy: &GroupingStrategy,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
@@ -1388,12 +1374,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_hash(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'a>>)],
+        predicates: &[(&str, &str)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1474,12 +1460,12 @@ impl<'a> Segments<'a> {
     fn read_group_eq_sort(
         &self,
         time_range: (i64, i64),
-        predicates: &[(&str, Option<column::Scalar<'a>>)],
+        predicates: &[(&str, &str)],
         mut group_columns: Vec<String>,
         aggregates: Vec<(String, AggregateType)>,
         window: i64,
         concurrent: bool,
-    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> {
+    ) -> BTreeMap<Vec<String>, Vec<((String, column::AggregateType), column::Aggregate)>> {
         if window > 0 {
             // add time column to the group key
             group_columns.push("time".to_string());
@@ -1611,16 +1597,16 @@ impl<'a> Segments<'a> {
     }
 
     /// Returns the minimum value for a column in a set of segments.
-    pub fn column_min(&self, column_name: &str) -> Option<column::Scalar<'_>> {
+    pub fn column_min(&self, column_name: &str) -> column::Value<'_> {
         if self.segments.is_empty() {
-            return None;
+            return column::Value::Null;
         }
 
-        let mut min_min: Option<column::Scalar<'_>> = None;
+        let mut min_min = column::Value::Null;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let min = segment.columns[i].min();
-                if min_min.is_none() {
+                if let column::Value::Null = min_min {
                     min_min = min
                 } else if min_min > min {
                     min_min = min;
@@ -1632,17 +1618,17 @@ impl<'a> Segments<'a> {
     }
 
     /// Returns the maximum value for a column in a set of segments.
-    pub fn column_max(&self, column_name: &str) -> Option<column::Scalar<'_>> {
+    pub fn column_max(&self, column_name: &str) -> column::Value<'_> {
         if self.segments.is_empty() {
-            return None;
+            return column::Value::Null;
         }
 
-        let mut max_max: Option<column::Scalar<'_>> = None;
+        let mut max_max = column::Value::Null;
         for segment in &self.segments {
             if let Some(i) = segment.column_names().iter().position(|c| c == column_name) {
                 let max = segment.columns[i].max();
-                if max_max.is_none() {
-                    max_max = max
+                if let column::Value::Null = max_max {
+                    max_max = max;
                 } else if max_max < max {
                     max_max = max;
                 }
@@ -1660,7 +1646,7 @@ impl<'a> Segments<'a> {
     /// If the time column has multiple max time values then the result is abitrary.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn first(&self, column_name: &str) -> Option<(i64, Option<column::Scalar<'_>>, usize)> {
+    pub fn first(&self, column_name: &str) -> Option<(i64, column::Value<'_>, usize)> {
         // First let's find the segment with the earliest time range.
         // notice we order  a < b on max time range.
         let segment = self
@@ -1689,10 +1675,10 @@ impl<'a> Segments<'a> {
     /// The last value is based on the time column, therefore the returned value
     /// may not be at the end of the column.
     ///
-    /// If the time column has multiple max time values then the result is abitrary.
+    /// If the time column has multiple max time values then the result is undefined.
     ///
     /// TODO(edd): could return NULL value..
-    pub fn last(&self, column_name: &str) -> Option<(i64, Option<column::Scalar<'_>>, usize)> {
+    pub fn last(&self, column_name: &str) -> Option<(i64, column::Value<'_>, usize)> {
         // First let's find the segment with the latest time range.
         // notice we order a > b on max time range.
         let segment = self
@@ -1804,7 +1790,7 @@ pub enum GroupingStrategy {
 #[derive(Debug)]
 pub struct GroupedAggregates<'a> {
     pub group_key: Vec<i64>,
-    pub aggregates: Vec<(&'a String, column::Aggregate<'a>)>,
+    pub aggregates: Vec<(&'a String, column::Aggregate)>,
 }
 
 #[cfg(test)]

From 635121c8013ac70a84db1e91bf1736f56bbd5d47 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 11:28:35 +0100
Subject: [PATCH 71/73] refactor: fix lifetime issue in hashgroup

---
 delorean_mem_qe/src/segment.rs | 74 ++++------------------------------
 1 file changed, 7 insertions(+), 67 deletions(-)

diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 3cc74aa03c..216ee2adf3 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -264,7 +264,7 @@ impl Segment {
         group_columns: &[String],
         aggregates: &'a [(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, &'a AggregateType, column::Aggregate)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, column::Aggregate)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -318,7 +318,6 @@ impl Segment {
                 panic!("need to handle no results for filtering/grouping...");
             }
         }
-        // println!("grouped columns {:?}", group_column_encoded_values);
 
         // TODO(edd): we could do this with an iterator I expect.
         //
@@ -373,10 +372,8 @@ impl Segment {
             .collect::<Vec<_>>();
 
         // hashMap is about 20% faster than BTreeMap in this case
-        let mut hash_table: BTreeMap<
-            Vec<i64>,
-            Vec<(&'a String, &'a AggregateType, column::Aggregate)>,
-        > = BTreeMap::new();
+        let mut hash_table: BTreeMap<Vec<i64>, Vec<(&'a String, column::Aggregate)>> =
+            BTreeMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
             std::iter::repeat_with(|| ("", None))
@@ -424,12 +421,11 @@ impl Segment {
 
             // This is cheaper than allocating a key and using the entry API
             if !hash_table.contains_key(&group_key) {
-                let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate)> =
+                let mut agg_results: Vec<(&'a String, column::Aggregate)> =
                     Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((
                         col_name,
-                        agg_type,
                         match agg_type {
                             AggregateType::Count => column::Aggregate::Count(0),
                             AggregateType::Sum => column::Aggregate::Sum(None),
@@ -445,7 +441,7 @@ impl Segment {
             //
             // TODO(edd): this is probably a bit of a perf suck.
             for (col_name, row_value) in &aggregate_row {
-                for &mut (cum_col_name, _, ref mut cum_agg_value) in group_key_entry.iter_mut() {
+                for &mut (cum_col_name, ref mut cum_agg_value) in group_key_entry.iter_mut() {
                     if col_name != cum_col_name {
                         continue;
                     }
@@ -465,33 +461,13 @@ impl Segment {
                             }
                         }
                     }
-                    // match cum_agg_value {
-                    //     Some(agg) => match agg {
-                    //         column::Aggregate::Count(_) => {
-                    //             *cum_agg_value = Some(agg + column::Aggregate::Count(Some(1)));
-                    //         }
-                    //         column::Aggregate::Sum(cum_sum) => {
-                    //             *cum_sum += row_value;
-                    //         }
-                    //     },
-                    //     None => {
-                    //         *cum_agg_value = match agg_type {
-                    //             AggregateType::Count => Some(column::Aggregate::Count(Some(0))),
-                    //             AggregateType::Sum => {
-                    //                 Some(column::Aggregate::Sum(row_value.clone()))
-                    //             }
-                    //         }
-                    //     }
-                    // }
                 }
             }
             processed_rows += 1;
         }
-        // println!("groups: {:?}", hash_table.len());
-        log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
-        BTreeMap::new()
 
-        // hash_table
+        log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
+        hash_table
     }
 
     pub fn aggregate_by_group_using_sort(
@@ -597,18 +573,6 @@ impl Segment {
         let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
         super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
 
-        // let group_itrs = all_columns
-        //     .iter()
-        //     .take(group_columns.len()) // only use grouping columns
-        //     .map(|vector| {
-        //         if let column::Vector::Integer(v) = vector {
-        //             v.iter()
-        //         } else {
-        //             panic!("don't support grouping on non-encoded values");
-        //         }
-        //     })
-        //     .collect::<Vec<_>>();
-
         let group_itrs = all_columns
             .iter()
             .take(group_columns.len())
@@ -706,17 +670,6 @@ impl Segment {
             })
             .collect::<Vec<_>>();
 
-        // let group_itrs = group_column_encoded_values
-        //     .iter()
-        //     .map(|vector| {
-        //         if let column::Vector::Integer(v) = vector {
-        //             v.iter()
-        //         } else {
-        //             panic!("don't support grouping on non-encoded values");
-        //         }
-        //     })
-        //     .collect::<Vec<_>>();
-
         let mut aggregate_cols = Vec::with_capacity(aggregates.len());
         for (column_name, agg_type) in aggregates {
             aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap()));
@@ -741,12 +694,6 @@ impl Segment {
             .iter_mut()
             .enumerate()
             .map(|(i, itr)| {
-                // if i == group_itrs_len - 1 && window > 0 {
-                //     // time column - apply window function
-                //     return itr.next().unwrap() / window * window;
-                // }
-                // *itr.next().unwrap()
-
                 if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
                     if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
@@ -785,12 +732,6 @@ impl Segment {
                 .zip(group_itrs.iter_mut())
                 .enumerate()
             {
-                // let next_v = if i == group_itrs_len - 1 && window > 0 {
-                //     // time column - apply window function
-                //     itr.next().unwrap() / window * window
-                // } else {
-                //     *itr.next().unwrap()
-                // };
                 let next_v = if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
                     if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
@@ -864,7 +805,6 @@ impl Segment {
             aggregates: group_key_aggregates,
         });
 
-        // println!("groups: {:?}", results.len());
         log::debug!("({:?} rows processed) {:?}", processed_rows, results);
         // results
         vec![]

From 02f036914ca776cd872517869426fa8d5e134e32 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 11:47:11 +0100
Subject: [PATCH 72/73] refactor: vectors comprise scalars

---
 delorean_mem_qe/src/column.rs | 207 +++++++++++++++++-----------------
 1 file changed, 104 insertions(+), 103 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index 25de6af868..a3445fe8c7 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -272,9 +272,9 @@ pub trait AggregatableByRange {
     ) -> Aggregate;
 }
 
-/// A Vector is a materialised vector of values from a column.
-pub enum Vector<'a> {
-    NullString(Vec<&'a Option<std::string::String>>),
+/// A Vector is a materialised vector of scalar values from a column.
+pub enum Vector {
+    // NullString(Vec<&'a Option<std::string::String>>),
     NullFloat(Vec<Option<f64>>),
     NullInteger(Vec<Option<i64>>),
 
@@ -292,7 +292,7 @@ pub enum Vector<'a> {
     //
 }
 
-impl<'a> Vector<'a> {
+impl Vector {
     pub fn aggregate_by_id_range(
         &self,
         agg_type: &AggregateType,
@@ -311,9 +311,9 @@ impl<'a> Vector<'a> {
     // are no non-null values in the vector being summed then None is returned.
     fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option<Scalar> {
         match self {
-            Self::NullString(_) => {
-                panic!("can't sum strings....");
-            }
+            // Self::NullString(_) => {
+            //     panic!("can't sum strings....");
+            // }
             Self::NullFloat(values) => {
                 let mut res = 0.0;
                 let mut found = false; // TODO(edd): check if this is faster than a match.
@@ -382,15 +382,15 @@ impl<'a> Vector<'a> {
     // to the count.
     fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 {
         match self {
-            Self::NullString(vec) => {
-                let mut count = 0;
-                for v in &vec[from_row_id..to_row_id] {
-                    if v.is_some() {
-                        count += 1;
-                    }
-                }
-                count as u64
-            }
+            // Self::NullString(vec) => {
+            //     let mut count = 0;
+            //     for v in &vec[from_row_id..to_row_id] {
+            //         if v.is_some() {
+            //             count += 1;
+            //         }
+            //     }
+            //     count as u64
+            // }
             Self::NullFloat(vec) => {
                 let mut count = 0;
                 for v in &vec[from_row_id..to_row_id] {
@@ -423,13 +423,13 @@ impl<'a> Vector<'a> {
 
     pub fn extend(&mut self, other: Self) {
         match self {
-            Self::NullString(v) => {
-                if let Self::NullString(other) = other {
-                    v.extend(other);
-                } else {
-                    unreachable!("string can't be extended");
-                }
-            }
+            // Self::NullString(v) => {
+            //     if let Self::NullString(other) = other {
+            //         v.extend(other);
+            //     } else {
+            //         unreachable!("string can't be extended");
+            //     }
+            // }
             Self::NullFloat(v) => {
                 if let Self::NullFloat(other) = other {
                     v.extend(other);
@@ -474,7 +474,7 @@ impl<'a> Vector<'a> {
 
     pub fn len(&self) -> usize {
         match self {
-            Self::NullString(v) => v.len(),
+            // Self::NullString(v) => v.len(),
             Self::NullFloat(v) => v.len(),
             Self::NullInteger(v) => v.len(),
             Self::Float(v) => v.len(),
@@ -485,33 +485,12 @@ impl<'a> Vector<'a> {
 
     /// Return the value within the vector at position `i`. If the value at
     /// position `i` is NULL then `None` is returned.
-    pub fn get(&self, i: usize) -> Value<'a> {
+    pub fn get(&self, i: usize) -> Option<Scalar> {
         match self {
-            Self::NullString(v) => match v[i] {
-                Some(v) => Value::String(v),
-                None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()),
-            },
-            Self::NullFloat(v) => match v[i] {
-                Some(v) => Value::Scalar(Scalar::Float(v)),
-                None => Value::Null,
-            },
-            Self::NullInteger(v) => match v[i] {
-                Some(v) => Value::Scalar(Scalar::Integer(v)),
-                None => Value::Null,
-            },
-            Self::Float(v) => Value::Scalar(Scalar::Float(v[i])),
-            Self::Integer(v) => Value::Scalar(Scalar::Integer(v[i])),
-            Self::Unsigned32(v) => Value::Scalar(Scalar::Unsigned32(v[i])),
-        }
-    }
-
-    /// Return the value within the vector at position `i`. If the value at
-    /// position `i` is NULL then `None` is returned.
-    //
-    // TODO - sort out
-    pub fn get_scalar(&self, i: usize) -> Option<Scalar> {
-        match self {
-            Self::NullString(_) => panic!("unsupported get_scalar"),
+            // Self::NullString(v) => match v[i] {
+            //     Some(v) => Value::String(v),
+            //     None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()),
+            // },
             Self::NullFloat(v) => match v[i] {
                 Some(v) => Some(Scalar::Float(v)),
                 None => None,
@@ -526,11 +505,32 @@ impl<'a> Vector<'a> {
         }
     }
 
+    /// Return the value within the vector at position `i`. If the value at
+    /// position `i` is NULL then `None` is returned.
+    //
+    // TODO - sort out
+    // pub fn get_scalar(&self, i: usize) -> Option<Scalar> {
+    //     match self {
+    //         Self::NullString(_) => panic!("unsupported get_scalar"),
+    //         Self::NullFloat(v) => match v[i] {
+    //             Some(v) => Some(Scalar::Float(v)),
+    //             None => None,
+    //         },
+    //         Self::NullInteger(v) => match v[i] {
+    //             Some(v) => Some(Scalar::Integer(v)),
+    //             None => None,
+    //         },
+    //         Self::Float(v) => Some(Scalar::Float(v[i])),
+    //         Self::Integer(v) => Some(Scalar::Integer(v[i])),
+    //         Self::Unsigned32(v) => Some(Scalar::Unsigned32(v[i])),
+    //     }
+    // }
+
     pub fn swap(&mut self, a: usize, b: usize) {
         match self {
-            Self::NullString(v) => {
-                v.swap(a, b);
-            }
+            // Self::NullString(v) => {
+            //     v.swap(a, b);
+            // }
             Self::NullFloat(v) => {
                 v.swap(a, b);
             }
@@ -550,7 +550,7 @@ impl<'a> Vector<'a> {
     }
 }
 
-impl AggregatableByRange for &Vector<'_> {
+impl AggregatableByRange for &Vector {
     fn aggregate_by_id_range(
         &self,
         agg_type: &AggregateType,
@@ -561,18 +561,18 @@ impl AggregatableByRange for &Vector<'_> {
     }
 }
 
-pub struct VectorIterator<'a> {
-    v: &'a Vector<'a>,
+pub struct VectorIterator {
+    v: Vector,
     next_i: usize,
 }
 
-impl<'a> VectorIterator<'a> {
-    pub fn new(v: &'a Vector<'a>) -> Self {
+impl VectorIterator {
+    pub fn new(v: Vector) -> Self {
         Self { v, next_i: 0 }
     }
 }
-impl<'a> Iterator for VectorIterator<'a> {
-    type Item = Value<'a>;
+impl Iterator for VectorIterator {
+    type Item = Option<Scalar>;
 
     fn next(&mut self) -> Option<Self::Item> {
         let curr_i = self.next_i;
@@ -591,37 +591,36 @@ impl<'a> Iterator for VectorIterator<'a> {
 ///
 ///
 /// TODO - need to figure this out - currently only returns scalars
-pub struct NullVectorIterator<'a> {
-    v: &'a Vector<'a>,
-    next_i: usize,
-}
+// pub struct NullVectorIterator {
+//     v: Vector,
+//     next_i: usize,
+// }
 
-impl<'a> NullVectorIterator<'a> {
-    pub fn new(v: &'a Vector<'a>) -> Self {
-        Self { v, next_i: 0 }
-    }
-}
-impl<'a> Iterator for NullVectorIterator<'a> {
-    type Item = Option<Scalar>;
+// impl NullVectorIterator {
+//     pub fn new(v: Vector) -> Self {
+//         Self { v, next_i: 0 }
+//     }
+// }
+// impl Iterator for NullVectorIterator {
+//     type Item = Option<Scalar>;
 
-    fn next(&mut self) -> Option<Self::Item> {
-        let curr_i = self.next_i;
-        self.next_i += 1;
+//     fn next(&mut self) -> Option<Self::Item> {
+//         let curr_i = self.next_i;
+//         self.next_i += 1;
 
-        if curr_i == self.v.len() {
-            return None;
-        }
-
-        Some(self.v.get_scalar(curr_i))
-    }
-}
+//         if curr_i == self.v.len() {
+//             return None;
+//         }
 
+//         Some(self.v.get_scalar(curr_i))
+//     }
+// }
 use chrono::prelude::*;
 
-impl<'a> std::fmt::Display for Vector<'a> {
+impl std::fmt::Display for Vector {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::NullString(v) => write!(f, "{:?}", v),
+            // Self::NullString(v) => write!(f, "{:?}", v),
             Self::NullFloat(v) => write!(f, "{:?}", v),
             Self::NullInteger(v) => {
                 for x in v.iter() {
@@ -723,14 +722,15 @@ impl Column {
     // which take up more memory and mean we can't do fast counts (since we need
     // to check each value is non-null).
     //
-    pub fn values(&self, row_ids: &[usize]) -> Vector<'_> {
+    pub fn values(&self, row_ids: &[usize]) -> Vector {
         match self {
             Column::String(c) => {
-                if row_ids.is_empty() {
-                    return Vector::NullString(vec![]);
-                }
+                panic!("unsupported at the moment")
+                // if row_ids.is_empty() {
+                //     return Vector::NullString(vec![]);
+                // }
 
-                Vector::NullString(c.values(row_ids))
+                // Vector::NullString(c.values(row_ids))
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -758,19 +758,20 @@ impl Column {
 
     /// Materialise all of the decoded values matching the provided logical
     /// row ids within the bitmap
-    pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
+    pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
         match self {
             Column::String(c) => {
-                if row_ids.is_empty() {
-                    return Vector::NullString(vec![]);
-                }
+                unreachable!("unsupported at the moment");
+                // if row_ids.is_empty() {
+                //     return Vector::NullString(vec![]);
+                // }
 
-                let row_id_vec = row_ids
-                    .to_vec()
-                    .iter()
-                    .map(|v| *v as usize)
-                    .collect::<Vec<_>>();
-                Vector::NullString(c.values(&row_id_vec))
+                // let row_id_vec = row_ids
+                //     .to_vec()
+                //     .iter()
+                //     .map(|v| *v as usize)
+                //     .collect::<Vec<_>>();
+                // Vector::NullString(c.values(&row_id_vec))
             }
             Column::Float(c) => {
                 if row_ids.is_empty() {
@@ -801,7 +802,7 @@ impl Column {
 
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
-    pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
+    pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
         let now = std::time::Instant::now();
         let row_ids_vec = row_ids
             .to_vec()
@@ -840,7 +841,7 @@ impl Column {
 
     /// Materialise all of the encoded values matching the provided logical
     /// row ids.
-    pub fn encoded_values(&self, row_ids: &[usize]) -> Vector<'_> {
+    pub fn encoded_values(&self, row_ids: &[usize]) -> Vector {
         match self {
             Column::String(c) => {
                 if row_ids.is_empty() {
@@ -872,7 +873,7 @@ impl Column {
     }
 
     /// Materialise all of the encoded values.
-    pub fn all_encoded_values(&self) -> Vector<'_> {
+    pub fn all_encoded_values(&self) -> Vector {
         match self {
             Column::String(c) => {
                 let now = std::time::Instant::now();
@@ -908,7 +909,7 @@ impl Column {
     }
 
     /// materialise rows for each row_id
-    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector<'_> {
+    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
         let now = std::time::Instant::now();
         let row_ids_vec = row_ids
             .to_vec()
@@ -924,7 +925,7 @@ impl Column {
             row_ids_vec[0]
         );
         match self {
-            Column::String(c) => Vector::NullString(c.values(&row_ids_vec)),
+            Column::String(c) => panic!("unsupported"), //Vector::NullString(c.values(&row_ids_vec)),
             Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)),
             Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)),
         }

From 068fc8fc77655b3085d6a388dcc53a7b5d2d78b3 Mon Sep 17 00:00:00 2001
From: Edd Robinson <me@edd.io>
Date: Fri, 25 Sep 2020 13:32:32 +0100
Subject: [PATCH 73/73] refactor: refactor lifetimes

---
 delorean_mem_qe/src/column.rs  |  50 +++++++++++---
 delorean_mem_qe/src/segment.rs | 123 ++++++++++++++++++---------------
 delorean_mem_qe/src/sorter.rs  |  12 ++--
 3 files changed, 115 insertions(+), 70 deletions(-)

diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs
index a3445fe8c7..449252d92d 100644
--- a/delorean_mem_qe/src/column.rs
+++ b/delorean_mem_qe/src/column.rs
@@ -18,6 +18,40 @@ pub enum Value<'a> {
     Scalar(Scalar),
 }
 
+pub enum Values<'a> {
+    String(Vec<&'a Option<std::string::String>>),
+    Float(Vec<Option<f64>>),
+    Integer(Vec<Option<i64>>),
+}
+
+impl Values<'_> {
+    pub fn extend(&mut self, other: Self) {
+        match self {
+            Self::String(v) => {
+                if let Self::String(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::Float(v) => {
+                if let Self::Float(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+            Self::Integer(v) => {
+                if let Self::Integer(other) = other {
+                    v.extend(other);
+                } else {
+                    unreachable!("string can't be extended");
+                }
+            }
+        }
+    }
+}
+
 #[derive(Debug, PartialEq, PartialOrd, Clone)]
 pub enum Scalar {
     Float(f64),
@@ -550,7 +584,7 @@ impl Vector {
     }
 }
 
-impl AggregatableByRange for &Vector {
+impl AggregatableByRange for Vector {
     fn aggregate_by_id_range(
         &self,
         agg_type: &AggregateType,
@@ -676,7 +710,7 @@ impl Column {
 
     /// Materialise the decoded value matching the provided logical
     /// row id.
-    pub fn value<'a>(&'a self, row_id: usize) -> Value<'a> {
+    pub fn value(&'_ self, row_id: usize) -> Value<'_> {
         match self {
             Column::String(c) => {
                 if row_id >= self.num_rows() {
@@ -724,7 +758,7 @@ impl Column {
     //
     pub fn values(&self, row_ids: &[usize]) -> Vector {
         match self {
-            Column::String(c) => {
+            Column::String(_) => {
                 panic!("unsupported at the moment")
                 // if row_ids.is_empty() {
                 //     return Vector::NullString(vec![]);
@@ -760,7 +794,7 @@ impl Column {
     /// row ids within the bitmap
     pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector {
         match self {
-            Column::String(c) => {
+            Column::String(_) => {
                 unreachable!("unsupported at the moment");
                 // if row_ids.is_empty() {
                 //     return Vector::NullString(vec![]);
@@ -909,7 +943,7 @@ impl Column {
     }
 
     /// materialise rows for each row_id
-    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector {
+    pub fn rows(&self, row_ids: &croaring::Bitmap) -> Values<'_> {
         let now = std::time::Instant::now();
         let row_ids_vec = row_ids
             .to_vec()
@@ -925,9 +959,9 @@ impl Column {
             row_ids_vec[0]
         );
         match self {
-            Column::String(c) => panic!("unsupported"), //Vector::NullString(c.values(&row_ids_vec)),
-            Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)),
-            Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)),
+            Column::String(c) => Values::String(c.values(&row_ids_vec)),
+            Column::Float(c) => Values::Float(c.values(&row_ids_vec)),
+            Column::Integer(c) => Values::Integer(c.values(&row_ids_vec)),
         }
     }
 
diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs
index 216ee2adf3..592b0bcc47 100644
--- a/delorean_mem_qe/src/segment.rs
+++ b/delorean_mem_qe/src/segment.rs
@@ -196,8 +196,8 @@ impl Segment {
         &self,
         row_ids: &croaring::Bitmap,
         columns: &[String],
-    ) -> BTreeMap<String, column::Vector<'_>> {
-        let mut rows: BTreeMap<String, column::Vector<'_>> = BTreeMap::new();
+    ) -> BTreeMap<String, column::Values<'_>> {
+        let mut rows = BTreeMap::new();
         if row_ids.is_empty() {
             // nothing to return
             return rows;
@@ -257,14 +257,14 @@ impl Segment {
         true
     }
 
-    pub fn aggregate_by_group_with_hash<'a>(
+    pub fn aggregate_by_group_with_hash(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, &str)],
         group_columns: &[String],
-        aggregates: &'a [(String, AggregateType)],
+        aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> BTreeMap<Vec<i64>, Vec<(&'a String, column::Aggregate)>> {
+    ) -> BTreeMap<Vec<i64>, Vec<(&String, column::Aggregate)>> {
         // Build a hash table - essentially, scan columns for matching row ids,
         // emitting the encoded value for each column and track those value
         // combinations in a hashmap with running aggregates.
@@ -325,7 +325,7 @@ impl Segment {
         // aggregating on.
         let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len());
         for (column_name, _) in aggregates {
-            let column_name: &'a String = column_name;
+            // let column_name: &String = column_name;
 
             if let Some(column) = self.column(&column_name) {
                 let decoded_values = column.values(&filtered_row_ids_vec);
@@ -349,7 +349,7 @@ impl Segment {
         // are grouping on. For columns that have no matching rows from the
         // filtering stage we will just emit None.
         let mut group_itrs = group_column_encoded_values
-            .iter()
+            .into_iter()
             .map(|vector| match vector {
                 column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
                 column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
@@ -361,19 +361,15 @@ impl Segment {
         // are aggregating on. For columns that have no matching rows from the
         // filtering stage we will just emit None.
         let mut aggregate_itrs = aggregate_column_decoded_values
-            .iter()
+            .into_iter()
             .map(|(col_name, values)| match values {
-                Some(values) => (
-                    col_name.as_str(),
-                    Some(column::NullVectorIterator::new(values)),
-                ),
+                Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))),
                 None => (col_name.as_str(), None),
             })
             .collect::<Vec<_>>();
 
         // hashMap is about 20% faster than BTreeMap in this case
-        let mut hash_table: BTreeMap<Vec<i64>, Vec<(&'a String, column::Aggregate)>> =
-            BTreeMap::new();
+        let mut hash_table: BTreeMap<Vec<i64>, Vec<(&String, column::Aggregate)>> = BTreeMap::new();
 
         let mut aggregate_row: Vec<(&str, Option<column::Scalar>)> =
             std::iter::repeat_with(|| ("", None))
@@ -388,16 +384,17 @@ impl Segment {
             group_itrs.iter_mut().enumerate().for_each(|(i, itr)| {
                 if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                    //
+                    // TODO(edd): this is assuming non-null timestamps
+                    if let Some(Some(column::Scalar::Integer(v))) = itr.next() {
                         group_key[i] = v / window * window;
                     } else {
                         unreachable!(
                             "something broken with grouping! Either processed None or wrong type"
                         );
                     }
-                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
-                    itr.next()
-                {
+                // The double Some is ok because encoded values are always non-null
+                } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() {
                     group_key[i] = v as i64
                 } else {
                     unreachable!(
@@ -421,7 +418,7 @@ impl Segment {
 
             // This is cheaper than allocating a key and using the entry API
             if !hash_table.contains_key(&group_key) {
-                let mut agg_results: Vec<(&'a String, column::Aggregate)> =
+                let mut agg_results: Vec<(&String, column::Aggregate)> =
                     Vec::with_capacity(aggregates.len());
                 for (col_name, agg_type) in aggregates {
                     agg_results.push((
@@ -467,7 +464,8 @@ impl Segment {
         }
 
         log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table);
-        hash_table
+        BTreeMap::new()
+        // hash_table
     }
 
     pub fn aggregate_by_group_using_sort(
@@ -477,7 +475,7 @@ impl Segment {
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> Vec<GroupedAggregates<'_>> {
+    ) -> Vec<GroupedAggregates> {
         log::debug!("aggregate_by_group_with_sort_unsorted called");
 
         if window > 0 {
@@ -573,8 +571,27 @@ impl Segment {
         let group_col_sort_order = &(0..group_columns.len()).collect::<Vec<_>>();
         super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap();
 
-        let group_itrs = all_columns
-            .iter()
+        let mut group_vecs = Vec::with_capacity(group_columns.len());
+        let mut agg_vecs = Vec::with_capacity(aggregates.len());
+        for (i, vec) in all_columns.into_iter().enumerate() {
+            if i < group_columns.len() {
+                group_vecs.push(vec);
+            } else {
+                agg_vecs.push(vec);
+            }
+        }
+
+        let mut aggregate_cols = Vec::with_capacity(aggregates.len());
+        for (sorted_vector, agg) in agg_vecs
+            .into_iter()
+            .skip(group_columns.len())
+            .zip(aggregates.iter())
+        {
+            aggregate_cols.push((agg.0.clone(), agg.1.clone(), sorted_vector));
+        }
+
+        let group_itrs = group_vecs
+            .into_iter()
             .take(group_columns.len())
             .map(|vector| match vector {
                 column::Vector::Unsigned32(_) => {
@@ -585,15 +602,6 @@ impl Segment {
             })
             .collect::<Vec<_>>();
 
-        let mut aggregate_cols = Vec::with_capacity(aggregates.len());
-        for (sorted_vector, (col_name, agg_type)) in all_columns
-            .iter()
-            .skip(group_columns.len())
-            .zip(aggregates.iter())
-        {
-            aggregate_cols.push((col_name, agg_type, sorted_vector));
-        }
-
         Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window)
     }
 
@@ -603,14 +611,14 @@ impl Segment {
     // `aggregate_by_group_using_stream` assumes that all columns being grouped
     // on are part of the overall segment sort, therefore it does no sorting or
     // hashing, and just streams aggregates out in order.
-    pub fn aggregate_by_group_using_stream<'a>(
+    pub fn aggregate_by_group_using_stream(
         &self,
         time_range: (i64, i64),
         predicates: &[(&str, &str)],
         group_columns: &[String],
         aggregates: &[(String, AggregateType)],
         window: i64,
-    ) -> Vec<GroupedAggregates<'a>> {
+    ) -> Vec<GroupedAggregates> {
         log::debug!("aggregate_by_group_using_stream called");
 
         if window > 0 {
@@ -662,7 +670,7 @@ impl Segment {
         }
 
         let group_itrs = group_column_encoded_values
-            .iter()
+            .into_iter()
             .map(|vector| match vector {
                 column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns
                 column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column
@@ -672,7 +680,11 @@ impl Segment {
 
         let mut aggregate_cols = Vec::with_capacity(aggregates.len());
         for (column_name, agg_type) in aggregates {
-            aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap()));
+            aggregate_cols.push((
+                column_name.clone(),
+                agg_type.clone(),
+                self.column(&column_name).unwrap(),
+            ));
         }
 
         Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window)
@@ -681,12 +693,12 @@ impl Segment {
     // Once the rows necessary for doing a (windowed) grouped aggregate are
     // available and appropriately sorted this method will build a result set of
     // aggregates in a streaming way.
-    pub fn stream_grouped_aggregates<'a>(
-        mut group_itrs: Vec<column::VectorIterator<'_>>,
-        aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>,
+    pub fn stream_grouped_aggregates(
+        mut group_itrs: Vec<column::VectorIterator>,
+        aggregate_cols: Vec<(String, AggregateType, impl column::AggregatableByRange)>,
         total_rows: usize,
         window: i64,
-    ) -> Vec<GroupedAggregates<'a>> {
+    ) -> Vec<GroupedAggregates> {
         // this tracks the last seen group key row. When it changes we can emit
         // the grouped aggregates.
         let group_itrs_len = &group_itrs.len();
@@ -696,16 +708,17 @@ impl Segment {
             .map(|(i, itr)| {
                 if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                    //
+                    // TODO(edd): this is assuming non-null time column
+                    if let Some(Some(column::Scalar::Integer(v))) = itr.next() {
                         v / window * window
                     } else {
                         unreachable!(
                             "something broken with grouping! Either processed None or wrong type"
                         );
                     }
-                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
-                    itr.next()
-                {
+                // the double some should be ok as encoded values can never be None
+                } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() {
                     v as i64
                 } else {
                     unreachable!(
@@ -734,16 +747,14 @@ impl Segment {
             {
                 let next_v = if i == group_itrs_len - 1 && window > 0 {
                     // time column - apply window function
-                    if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() {
+                    if let Some(Some(column::Scalar::Integer(v))) = itr.next() {
                         v / window * window
                     } else {
                         unreachable!(
                             "something broken with grouping! Either processed None or wrong type"
                         );
                     }
-                } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) =
-                    itr.next()
-                {
+                } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() {
                     v as i64
                 } else {
                     unreachable!(
@@ -767,7 +778,7 @@ impl Segment {
                         group_key_start_row_id + group_size,
                     );
 
-                    group_key_aggregates.push((*name, agg_result));
+                    group_key_aggregates.push((name.clone(), agg_result));
                 }
 
                 results.push(GroupedAggregates {
@@ -797,7 +808,7 @@ impl Segment {
             );
 
             // TODO(edd): fix weirdness
-            group_key_aggregates.push((*name, agg_result));
+            group_key_aggregates.push((name.clone(), agg_result));
         }
 
         results.push(GroupedAggregates {
@@ -806,8 +817,8 @@ impl Segment {
         });
 
         log::debug!("({:?} rows processed) {:?}", processed_rows, results);
-        // results
-        vec![]
+        // vec![]
+        results
     }
 
     pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option<column::Scalar> {
@@ -1230,13 +1241,13 @@ impl<'a> Segments<'a> {
         time_range: (i64, i64),
         predicates: &[(&str, &str)],
         select_columns: Vec<String>,
-    ) -> BTreeMap<String, column::Vector<'_>> {
+    ) -> BTreeMap<String, column::Values<'_>> {
         let (min, max) = time_range;
         if max <= min {
             panic!("max <= min");
         }
 
-        let mut columns: BTreeMap<String, column::Vector<'_>> = BTreeMap::new();
+        let mut columns: BTreeMap<String, column::Values<'_>> = BTreeMap::new();
         for segment in &self.segments {
             if !segment.meta.overlaps_time_range(min, max) {
                 continue; // segment doesn't have time range
@@ -1728,9 +1739,9 @@ pub enum GroupingStrategy {
 }
 
 #[derive(Debug)]
-pub struct GroupedAggregates<'a> {
+pub struct GroupedAggregates {
     pub group_key: Vec<i64>,
-    pub aggregates: Vec<(&'a String, column::Aggregate)>,
+    pub aggregates: Vec<(String, column::Aggregate)>,
 }
 
 #[cfg(test)]
diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs
index 7dd7d6ced3..0e0a607c4d 100644
--- a/delorean_mem_qe/src/sorter.rs
+++ b/delorean_mem_qe/src/sorter.rs
@@ -43,7 +43,7 @@ const SORTED_CHECK_SIZE: usize = 1000;
 ///
 /// All chosen columns will be sorted in ascending order; the sort is *not*
 /// stable.
-pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), Error> {
+pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> {
     if vectors.is_empty() || sort_by.is_empty() {
         return Ok(());
     }
@@ -84,7 +84,7 @@ pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(),
     Ok(())
 }
 
-fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range<usize>, sort_by: &[usize]) {
+fn quicksort_by(vectors: &mut [column::Vector], range: Range<usize>, sort_by: &[usize]) {
     if range.start >= range.end {
         return;
     }
@@ -94,7 +94,7 @@ fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range<usize>, sort_by
     quicksort_by(vectors, pivot + 1..range.end, sort_by);
 }
 
-fn partition(vectors: &mut [column::Vector<'_>], range: &Range<usize>, sort_by: &[usize]) -> usize {
+fn partition(vectors: &mut [column::Vector], range: &Range<usize>, sort_by: &[usize]) -> usize {
     let pivot = (range.start + range.end) / 2;
     let (lo, hi) = (range.start, range.end);
     if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less {
@@ -130,7 +130,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range<usize>, sort_by:
     }
 }
 
-fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
+fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering {
     for &idx in sort_by {
         match &vectors[idx] {
             column::Vector::Unsigned32(p) => {
@@ -154,7 +154,7 @@ fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) ->
 }
 
 #[allow(dead_code)]
-fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usize]) -> bool {
+fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool {
     'row_wise: for i in 1..len {
         for &idx in sort_by {
             match &vectors[idx] {
@@ -199,7 +199,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi
 }
 
 // Swap the same pair of elements in each packer column
-fn swap(vectors: &mut [column::Vector<'_>], a: usize, b: usize) {
+fn swap(vectors: &mut [column::Vector], a: usize, b: usize) {
     for p in vectors {
         p.swap(a, b);
     }