From 6d793218377a36bada47d5004c517ec442be33ee Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 31 Jul 2020 11:28:11 +0100 Subject: [PATCH 01/73] feat: add mutli-column Quicksort for `[Packers]` --- delorean_table/src/packers.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/delorean_table/src/packers.rs b/delorean_table/src/packers.rs index a0687afe5c..4d73da810f 100644 --- a/delorean_table/src/packers.rs +++ b/delorean_table/src/packers.rs @@ -215,7 +215,7 @@ where impl Packer where - T: Default + Clone, + T: Default + Clone + std::fmt::Debug, { pub fn new() -> Self { Self { values: Vec::new() } @@ -333,7 +333,7 @@ where impl<'a, T> Iterator for PackerIterator<'a, T> where - T: Default + Clone, + T: Default + Clone + std::fmt::Debug, { type Item = Option<&'a T>; @@ -365,7 +365,7 @@ where // `Packer` value, e.g., `Packer`. impl std::convert::From>> for Packer where - T: Default + Clone, + T: Default + Clone + std::fmt::Debug, { fn from(values: Vec>) -> Self { let mut packer = Self::new(); From 238e9895551d7721126a91b4812dbb2440f56188 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 10:05:26 +0100 Subject: [PATCH 02/73] feat: add ability to emit Packer values in chunks --- delorean_table/src/packers.rs | 44 +++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/delorean_table/src/packers.rs b/delorean_table/src/packers.rs index 4d73da810f..fc1223b30d 100644 --- a/delorean_table/src/packers.rs +++ b/delorean_table/src/packers.rs @@ -7,6 +7,7 @@ // soon... We'll see how long that actually takes... use core::iter::Iterator; use std::iter; +use std::slice::Chunks; use delorean_arrow::parquet::data_type::ByteArray; use std::default::Default; @@ -44,7 +45,16 @@ macro_rules! typed_packer_accessors { }; } -impl Packers { +impl<'a> Packers { + pub fn chunk_values(&self, chunk_size: usize) -> PackerChunker<'_> { + match self { + Self::Float(p) => PackerChunker::Float(p.values.chunks(chunk_size)), + Self::Integer(p) => PackerChunker::Integer(p.values.chunks(chunk_size)), + Self::String(p) => PackerChunker::String(p.values.chunks(chunk_size)), + Self::Boolean(p) => PackerChunker::Boolean(p.values.chunks(chunk_size)), + } + } + /// Create a String Packers with repeated values. pub fn from_elem_str(v: &str, n: usize) -> Self { Self::String(Packer::from(vec![ByteArray::from(v); n])) @@ -205,6 +215,15 @@ impl std::convert::From>>> for Packers { } } +/// PackerChunker represents chunkable Packer variants. +#[derive(Debug)] +pub enum PackerChunker<'a> { + Float(Chunks<'a, Option>), + Integer(Chunks<'a, Option>), + String(Chunks<'a, Option>), + Boolean(Chunks<'a, Option>), +} + #[derive(Debug, Default, PartialEq)] pub struct Packer where @@ -259,7 +278,13 @@ where &self.values } - /// returns a binary vector indicating which indexes have null values. + /// Returns an iterator that emits `chunk_size` values from the Packer until + /// all values are returned. + pub fn chunk_values(&self, chunk_size: usize) -> std::slice::Chunks<'_, Option> { + self.values.chunks(chunk_size) + } + + /// Returns a binary vector indicating which indexes have null values. pub fn def_levels(&self) -> Vec { self.values .iter() @@ -376,6 +401,21 @@ where } } +// Convert `&[]`, e.g., `&[Option]` into the appropriate +// `Packer` value, e.g., `Packer`. +impl std::convert::From<&[Option]> for Packer +where + T: Default + Clone + std::fmt::Debug, +{ + fn from(values: &[Option]) -> Self { + let mut packer = Self::new(); + for v in values { + packer.push_option(v.clone()); + } + packer + } +} + #[cfg(test)] mod test { use super::*; From cb3e948ca0d3d922fe1a79ed9bc1532a83696211 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 10:06:35 +0100 Subject: [PATCH 03/73] feat: TO REMOVE - TSM -> Arrow --- delorean_ingest/Cargo.toml | 2 +- delorean_ingest/src/lib.rs | 281 +++++++++++++++++++++++++++++++++++-- 2 files changed, 272 insertions(+), 11 deletions(-) diff --git a/delorean_ingest/Cargo.toml b/delorean_ingest/Cargo.toml index b1a26ea873..e582e8848b 100644 --- a/delorean_ingest/Cargo.toml +++ b/delorean_ingest/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] - +arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } snafu = "0.6.2" env_logger = "0.7.1" log = "0.4.8" diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs index 9f8ff4cfcc..9a0223e8b7 100644 --- a/delorean_ingest/src/lib.rs +++ b/delorean_ingest/src/lib.rs @@ -11,7 +11,7 @@ use delorean_line_parser::{FieldValue, ParsedLine}; use delorean_table::{ - packers::{Packer, Packers}, + packers::{Packer, PackerChunker, Packers}, ByteArray, DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError, }; use delorean_table_schema::{DataType, Schema, SchemaBuilder}; @@ -508,6 +508,154 @@ fn pack_lines<'a>(schema: &Schema, lines: &[ParsedLine<'a>]) -> Vec { packers } +use arrow::array; +use arrow::datatypes; +use arrow::ipc::writer; +use arrow::record_batch; +use std::fs::File; +use std::sync::Arc; + +fn arrow_datatype(datatype: DataType) -> datatypes::DataType { + match datatype { + DataType::Float => datatypes::DataType::Float64, + DataType::Integer => datatypes::DataType::Int64, + DataType::String => datatypes::DataType::Utf8, + // DataType::String => datatypes::DataType::Dictionary( + // std::boxed::Box::new(datatypes::DataType::Int16), + // std::boxed::Box::new(datatypes::DataType::Utf8), + // ), + DataType::Boolean => datatypes::DataType::Boolean, + DataType::Timestamp => datatypes::DataType::Int64, + } +} + +fn write_arrow_file(parquet_schema: Schema, packers: Vec) -> Result<(), Error> { + let file = File::create("/tmp/http_api_requests_total.arrow").unwrap(); + + let mut record_batch_fields: Vec = vec![]; + // no default() on Field... + record_batch_fields.resize( + parquet_schema.get_col_defs().len(), + datatypes::Field::new("foo", datatypes::DataType::Int64, false), + ); + + for col_def in parquet_schema.get_col_defs() { + let nullable = col_def.data_type != DataType::Timestamp; + // if col_def.data_type == DataType::Timestamp { + // nullable = false; + // } else { + // nullable = true; + // } + + record_batch_fields[col_def.index as usize] = datatypes::Field::new( + col_def.name.as_str(), + arrow_datatype(col_def.data_type), + nullable, + ); + } + println!("{:?}", record_batch_fields); + println!("{:?}", parquet_schema.get_col_defs()); + let schema = datatypes::Schema::new(record_batch_fields); + + let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap(); + + // let num_rows = packers[0].num_rows(); + let batch_size = 60_000; + + let mut packer_chunkers: Vec> = vec![]; + for packer in &packers { + packer_chunkers.push(packer.chunk_values(batch_size)); + } + + loop { + let mut chunked_packers: Vec = Vec::with_capacity(packers.len()); + for chunker in &mut packer_chunkers { + match chunker { + PackerChunker::Float(c) => { + if let Some(chunk) = c.next() { + chunked_packers.push(Packers::Float(Packer::from(chunk))); + } + } + PackerChunker::Integer(c) => { + if let Some(chunk) = c.next() { + chunked_packers.push(Packers::Integer(Packer::from(chunk))); + } + } + PackerChunker::String(c) => { + if let Some(chunk) = c.next() { + chunked_packers.push(Packers::String(Packer::from(chunk))); + } + } + PackerChunker::Boolean(c) => { + if let Some(chunk) = c.next() { + chunked_packers.push(Packers::Boolean(Packer::from(chunk))); + } + } + } + } + + if chunked_packers.is_empty() { + break; + } + + // let sort = [0, 7, 6, 12]; + // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; + let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; + delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap(); + + println!( + "Writing {:?} packers with size: {:?}", + chunked_packers.len(), + chunked_packers[0].num_rows() + ); + write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers); + } + + writer.finish().unwrap(); + Ok(()) +} + +fn write_arrow_batch( + w: &mut writer::StreamWriter, + schema: Arc, + packers: Vec, +) { + let mut record_batch_arrays: Vec = vec![]; + + for packer in packers { + match packer { + Packers::Float(p) => { + record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec()))); + } + Packers::Integer(p) => { + record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec()))); + } + Packers::String(p) => { + let mut builder = array::StringBuilder::new(p.num_rows()); + for v in p.values() { + match v { + Some(v) => { + builder.append_value(v.as_utf8().unwrap()).unwrap(); + } + None => { + builder.append_null().unwrap(); + } + } + } + let array = builder.finish(); + record_batch_arrays.push(Arc::new(array)); + } + Packers::Boolean(p) => { + let array = array::BooleanArray::from(p.values().to_vec()); + record_batch_arrays.push(Arc::new(array)); + } + } + } + + let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap(); + w.write(&record_batch).unwrap(); +} + /// Converts one or more TSM files into the delorean_table internal columnar /// data format and then passes that converted data to a `DeloreanTableWriter`. pub struct TSMFileConverter { @@ -571,18 +719,131 @@ impl TSMFileConverter { match next_measurement { Some(mut table) => { + if table.name != "http_api_requests_total" { + continue; + } // convert (potentially merged) measurement.. - let (schema, packed_columns) = + let (schema, mut packed_columns) = Self::process_measurement_table(&mut block_reader, &mut table)?; - let mut table_writer = self - .table_writer_source - .next_writer(&schema) - .context(WriterCreation)?; - table_writer - .write_batch(&packed_columns) - .context(WriterCreation)?; - table_writer.close().context(WriterCreation)?; + // println!("col def {:?}", schema.get_col_defs()); + // // cardinality + // for (i, col) in packed_columns.iter().enumerate() { + // println!("processing column {:?}", i); + // if let Packers::String(p) = col { + // let mut set: std::collections::BTreeSet<_> = BTreeSet::new(); + // for v in p.iter() { + // if let Some(v) = v { + // set.insert(String::from(v.as_utf8().unwrap())); + // } + // } + // println!("Cardinality for col is {:?}", set.len()); + // } + // } + // col def [ColumnDefinition { name: "env", index: 0, data_type: String }, + // ColumnDefinition { name: "handler", index: 1, data_type: String }, + // ColumnDefinition { name: "host", index: 2, data_type: String }, + // ColumnDefinition { name: "hostname", index: 3, data_type: String }, + // ColumnDefinition { name: "method", index: 4, data_type: String }, + // ColumnDefinition { name: "nodename", index: 5, data_type: String }, + // ColumnDefinition { name: "path", index: 6, data_type: String }, + // ColumnDefinition { name: "role", index: 7, data_type: String }, + // ColumnDefinition { name: "status", index: 8, data_type: String }, + // ColumnDefinition { name: "url", index: 9, data_type: String }, + // ColumnDefinition { name: "user_agent", index: 10, data_type: String }, + // ColumnDefinition { name: "counter", index: 11, data_type: Float }, + // ColumnDefinition { name: "time", index: 12, data_type: Timestamp }] + // processing column 0 + // Cardinality for col is 8 + // processing column 1 + // Cardinality for col is 8 + // processing column 2 + // Cardinality for col is 3005 + // processing column 3 + // Cardinality for col is 3005 + // processing column 4 + // Cardinality for col is 6 + // processing column 5 + // Cardinality for col is 148 + // processing column 6 + // Cardinality for col is 78 + // processing column 7 + // Cardinality for col is 14 + // processing column 8 + // Cardinality for col is 4 + // processing column 9 + // Cardinality for col is 6 + // processing column 10 + // Cardinality for col is 71 + // processing column 11 + // processing column 12 + // got all card + // println!("got all card"); + + // sort low to high == + // + // status 8 (4) + // method 4 (6) + // url 9 (6) + // env 0 (8) + // handler 1 (8) + // role 7 (14) + // user_agent 10 (71) + // path 6 (78) + // nodename 5 (148) + // host 2 (3005) + // hostname 3 (3005) + // + // time 12 + + if packed_columns.len() < 13 { + continue; + } + + println!("length of column s is {:?}", packed_columns.len()); + // let sort = [0, 7, 6, 12]; + // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; + // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; + let sort = [12]; + println!("Starting sort with {:?}", sort); + let now = std::time::Instant::now(); + + delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap(); + + println!("verifying order"); + let values = packed_columns[12].i64_packer_mut().values(); + let mut last = values[0]; + for i in 1..values.len() { + assert!(values[i] >= last); + last = values[i]; + } + println!("finished sort in {:?}", now.elapsed()); + + println!("Writing to arrow file!"); + write_arrow_file(schema, packed_columns).unwrap(); + println!("Done!"); + + // if packed_columns.len() < 13 { + // continue; + // } + // println!("length of column s is {:?}", packed_columns.len()); + // // let sort = [0, 7, 6, 12]; + // // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; + // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; + // println!("Starting sort with {:?}", sort); + // let now = std::time::Instant::now(); + // delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap(); + // println!("finished sort in {:?}", now.elapsed()); + + // let mut table_writer = self + // .table_writer_source + // .next_writer(&schema) + // .context(WriterCreation)?; + + // table_writer + // .write_batch(&packed_columns) + // .context(WriterCreation)?; + // table_writer.close().context(WriterCreation)?; } None => break, } From aba02cb731f533531ae9f639bcd9ac36330092d2 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 13:47:14 +0100 Subject: [PATCH 04/73] feat: basic store --- .gitignore | 2 +- Cargo.lock | 54 ++++- Cargo.toml | 2 + delorean_mem_qe/Cargo.toml | 16 ++ delorean_mem_qe/benches/encoding.rs | 97 +++++++++ delorean_mem_qe/src/bin/main.rs | 93 +++++++++ delorean_mem_qe/src/column.rs | 233 ++++++++++++++++++++++ delorean_mem_qe/src/encoding.rs | 299 ++++++++++++++++++++++++++++ delorean_mem_qe/src/lib.rs | 28 +++ delorean_mem_qe/src/segment.rs | 95 +++++++++ 10 files changed, 915 insertions(+), 4 deletions(-) create mode 100644 delorean_mem_qe/Cargo.toml create mode 100644 delorean_mem_qe/benches/encoding.rs create mode 100644 delorean_mem_qe/src/bin/main.rs create mode 100644 delorean_mem_qe/src/column.rs create mode 100644 delorean_mem_qe/src/encoding.rs create mode 100644 delorean_mem_qe/src/lib.rs create mode 100644 delorean_mem_qe/src/segment.rs diff --git a/.gitignore b/.gitignore index 6bab7d0b55..98a48ecef1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/target +**/target **/*.rs.bk .idea/ .env diff --git a/Cargo.lock b/Cargo.lock index 29c2d3710e..328fb59d2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -104,6 +104,42 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow" +version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" +dependencies = [ + "arrow-flight", + "chrono", + "csv", + "flatbuffers", + "hex", + "indexmap", + "lazy_static", + "num 0.3.0", + "prettytable-rs", + "rand", + "regex", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "arrow-flight" +version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" +dependencies = [ + "bytes", + "futures", + "proc-macro2", + "prost", + "prost-derive", + "tokio", + "tonic", + "tonic-build", +] + [[package]] name = "assert-json-diff" version = "1.1.0" @@ -611,7 +647,7 @@ name = "datafusion" version = "2.0.0-SNAPSHOT" source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" dependencies = [ - "arrow", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "chrono", "clap", "crossbeam", @@ -637,6 +673,7 @@ dependencies = [ "delorean_generated_types", "delorean_ingest", "delorean_line_parser", + "delorean_mem_qe", "delorean_object_store", "delorean_parquet", "delorean_partitioned_store", @@ -677,7 +714,7 @@ dependencies = [ name = "delorean_arrow" version = "0.1.0" dependencies = [ - "arrow", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "datafusion", "parquet", ] @@ -698,6 +735,7 @@ dependencies = [ name = "delorean_ingest" version = "0.1.0" dependencies = [ + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", "delorean_line_parser", "delorean_table", "delorean_table_schema", @@ -722,6 +760,16 @@ dependencies = [ "snafu", ] +[[package]] +name = "delorean_mem_qe" +version = "0.1.0" +dependencies = [ + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "croaring", + "delorean_table", + "snafu", +] + [[package]] name = "delorean_object_store" version = "0.1.0" @@ -1982,7 +2030,7 @@ name = "parquet" version = "2.0.0-SNAPSHOT" source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" dependencies = [ - "arrow", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "brotli", "byteorder", "chrono", diff --git a/Cargo.toml b/Cargo.toml index a7988b0734..fbc65ec72c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "delorean_ingest", "delorean_line_parser", "delorean_object_store", + "delorean_mem_qe", "delorean_parquet", "delorean_partitioned_store", "delorean_table", @@ -33,6 +34,7 @@ delorean_arrow = { path = "delorean_arrow" } delorean_generated_types = { path = "delorean_generated_types" } delorean_ingest = { path = "delorean_ingest" } delorean_line_parser = { path = "delorean_line_parser" } +delorean_mem_qe = { path = "delorean_mem_qe" } delorean_parquet = { path = "delorean_parquet" } delorean_partitioned_store = { path = "delorean_partitioned_store" } delorean_table = { path = "delorean_table" } diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml new file mode 100644 index 0000000000..aaf38f1b7a --- /dev/null +++ b/delorean_mem_qe/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "delorean_mem_qe" +version = "0.1.0" +authors = ["Edd Robinson "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +delorean_table = { path = "../delorean_table" } +arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +snafu = "0.6.8" +croaring = "0.4.5" + +[dev-dependencies] + diff --git a/delorean_mem_qe/benches/encoding.rs b/delorean_mem_qe/benches/encoding.rs new file mode 100644 index 0000000000..504ce64c8d --- /dev/null +++ b/delorean_mem_qe/benches/encoding.rs @@ -0,0 +1,97 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; + +const BATCH_SIZES: [usize; 5] = [10, 100, 1_000, 10_000, 100_000]; +const CARDINALITIES: [usize; 4] = [1, 5, 10, 100]; + +fn encoding_drle_row_ids_sorted(c: &mut Criterion) { + benchmark_row_ids( + c, + "encoding_drle_row_ids_sorted", + &BATCH_SIZES, + &CARDINALITIES, + ); +} + +fn benchmark_row_ids( + c: &mut Criterion, + benchmark_group_name: &str, + batch_sizes: &[usize], + cardinalities: &[usize], +) { + let mut group = c.benchmark_group(benchmark_group_name); + for &batch_size in batch_sizes { + for &cardinality in cardinalities { + let mut input = delorean_mem_qe::encoding::DictionaryRLE::new(); + let values = batch_size / cardinality; + for i in 0..cardinality { + input.push_additional(i.to_string().as_str(), values as u64); + } + group.throughput(Throughput::Bytes(batch_size as u64)); + + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:?}_{:?}", batch_size, cardinality)), + &input, + |b, input| { + b.iter(|| { + // do work + for i in 0..cardinality { + let ids = input + .row_ids(i.to_string().as_str()) + .collect::>(); + } + }); + }, + ); + } + } + group.finish(); +} + +fn encoding_drle_row_ids_sorted_roaring(c: &mut Criterion) { + benchmark_row_ids_roaring( + c, + "encoding_drle_row_ids_sorted_roaring", + &BATCH_SIZES, + &CARDINALITIES, + ); +} + +fn benchmark_row_ids_roaring( + c: &mut Criterion, + benchmark_group_name: &str, + batch_sizes: &[usize], + cardinalities: &[usize], +) { + let mut group = c.benchmark_group(benchmark_group_name); + for &batch_size in batch_sizes { + for &cardinality in cardinalities { + let mut input = delorean_mem_qe::encoding::DictionaryRLE::new(); + let values = batch_size / cardinality; + for i in 0..cardinality { + input.push_additional(i.to_string().as_str(), values as u64); + } + group.throughput(Throughput::Bytes(batch_size as u64)); + + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:?}_{:?}", batch_size, cardinality)), + &input, + |b, input| { + b.iter(|| { + // do work + for i in 0..cardinality { + let ids = input.row_ids_roaring(i.to_string().as_str()); + } + }); + }, + ); + } + } + group.finish(); +} + +criterion_group!( + benches, + encoding_drle_row_ids_sorted, + encoding_drle_row_ids_sorted_roaring +); +criterion_main!(benches); diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs new file mode 100644 index 0000000000..c7bdc932b3 --- /dev/null +++ b/delorean_mem_qe/src/bin/main.rs @@ -0,0 +1,93 @@ +use std::{fs::File, path::Path}; + +use arrow::record_batch::{RecordBatch, RecordBatchReader}; +use arrow::{array, array::Array, datatypes, ipc}; + +use delorean_mem_qe::column; +use delorean_mem_qe::column::Column; +use delorean_mem_qe::segment::Segment; +use delorean_mem_qe::Store; + +// use snafu::ensure; +use snafu::Snafu; + +#[derive(Snafu, Debug, Clone, Copy, PartialEq)] +pub enum Error { + // #[snafu(display(r#"Too many sort columns specified"#))] +// TooManyColumns, + +// #[snafu(display(r#"Same column specified as sort column multiple times"#))] +// RepeatedColumns { index: usize }, + +// #[snafu(display(r#"Specified column index is out bounds"#))] +// OutOfBoundsColumn { index: usize }, +} + +fn main() { + let mut store = Store::default(); + read_arrow_file(&mut store); + + println!( + "total segments {:?} with total size {:?}", + store.segment_total(), + store.size(), + ); +} + +fn read_arrow_file(store: &mut Store) { + let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); + let mut reader = ipc::reader::StreamReader::try_new(r).unwrap(); + while let Some(batch) = reader.next_batch().unwrap() { + let segment = record_batch_to_segment(&batch).unwrap(); + store.add_segment(segment); + } +} + +fn record_batch_to_segment(rb: &RecordBatch) -> Result { + let mut segment = Segment::default(); + + // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); + for (i, column) in rb.columns().iter().enumerate() { + match *column.data_type() { + datatypes::DataType::Float64 => { + let arr = column + .as_any() + .downcast_ref::() + .unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), column); + } + datatypes::DataType::Int64 => { + let arr = column.as_any().downcast_ref::().unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), column); + } + datatypes::DataType::Utf8 => { + let arr = column + .as_any() + .downcast_ref::() + .unwrap(); + + let mut column = column::String::default(); + let mut prev = arr.value(0); + let mut count = 1_u64; + for j in 1..arr.len() { + let next = arr.value(j); + if prev == next { + count += 1; + } else { + column.add_additional(prev, count); + prev = next; + count = 1; + } + } + segment.add_column(rb.schema().field(i).name(), Column::String(column)); + } + datatypes::DataType::Boolean => { + panic!("unsupported"); + } + _ => panic!("unsupported datatype"), + } + } + Ok(segment) +} diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs new file mode 100644 index 0000000000..188d0dd193 --- /dev/null +++ b/delorean_mem_qe/src/column.rs @@ -0,0 +1,233 @@ +use std::convert::From; + +use super::encoding; + +#[derive(Debug)] +pub enum Column { + String(String), + Float(Float), + Integer(Integer), +} + +impl Column { + /// Returns the number of logical rows for the column. + pub fn num_rows(&self) -> usize { + match self { + Column::String(c) => c.meta.num_rows(), + Column::Float(c) => c.meta.num_rows(), + Column::Integer(c) => c.meta.num_rows(), + } + } + + // Returns the size of the segment in bytes. + pub fn size(&self) -> usize { + match self { + Column::String(c) => c.size(), + Column::Float(c) => c.size(), + Column::Integer(c) => c.size(), + } + } +} + +impl From<&[f64]> for Column { + fn from(values: &[f64]) -> Self { + Self::Float(Float::from(values)) + } +} + +impl From<&[i64]> for Column { + fn from(values: &[i64]) -> Self { + Self::Integer(Integer::from(values)) + } +} + +#[derive(Debug, Default)] +pub struct String { + meta: metadata::Str, + + // TODO(edd): this would probably have multiple possible encodings + data: encoding::DictionaryRLE, +} + +impl String { + pub fn add(&mut self, s: &str) { + self.meta.add(s); + self.data.push(s); + } + + pub fn add_additional(&mut self, s: &str, additional: u64) { + self.meta.add(s); + self.data.push_additional(s, additional); + } + + pub fn column_range(&self) -> (&str, &str) { + self.meta.range() + } + + pub fn size(&self) -> usize { + self.meta.size() + self.data.size() + } +} + +#[derive(Debug, Default)] +pub struct Float { + meta: metadata::F64, + + // TODO(edd): compression of float columns + data: encoding::PlainFixed, +} + +impl Float { + pub fn column_range(&self) -> (f64, f64) { + self.meta.range() + } + + pub fn size(&self) -> usize { + self.meta.size() + self.data.size() + } +} + +impl From<&[f64]> for Float { + fn from(values: &[f64]) -> Self { + let len = values.len(); + let mut min = std::f64::MAX; + let mut max = std::f64::MIN; + + // calculate min/max for meta data + for v in values { + min = min.min(*v); + max = max.max(*v); + } + + Self { + meta: metadata::F64::new((min, max), len), + data: encoding::PlainFixed::from(values), + } + } +} + +#[derive(Debug, Default)] +pub struct Integer { + meta: metadata::I64, + + // TODO(edd): compression of integers + data: encoding::PlainFixed, +} + +impl Integer { + pub fn column_range(&self) -> (i64, i64) { + self.meta.range() + } + + pub fn size(&self) -> usize { + self.meta.size() + self.data.size() + } +} + +impl From<&[i64]> for Integer { + fn from(values: &[i64]) -> Self { + let len = values.len(); + let mut min = std::i64::MAX; + let mut max = std::i64::MIN; + + // calculate min/max for meta data + for v in values { + min = min.min(*v); + max = max.max(*v); + } + + Self { + meta: metadata::I64::new((min, max), len), + data: encoding::PlainFixed::from(values), + } + } +} + +pub mod metadata { + #[derive(Debug, Default)] + pub struct Str { + range: (String, String), + num_rows: usize, + // sparse_index: BTreeMap, + } + + impl Str { + pub fn add(&mut self, s: &str) { + self.num_rows += 1; + + if self.range.0.as_str() > s { + self.range.0 = s.to_owned(); + } + + if self.range.1.as_str() < s { + self.range.1 = s.to_owned(); + } + } + + pub fn num_rows(&self) -> usize { + self.num_rows + } + + pub fn range(&self) -> (&str, &str) { + (&self.range.0, &self.range.1) + } + + pub fn size(&self) -> usize { + self.range.0.len() + self.range.1.len() + std::mem::size_of::() + } + } + + #[derive(Debug, Default)] + pub struct F64 { + range: (f64, f64), + num_rows: usize, + } + + impl F64 { + pub fn new(range: (f64, f64), rows: usize) -> Self { + Self { + range, + num_rows: rows, + } + } + + pub fn num_rows(&self) -> usize { + self.num_rows + } + + pub fn range(&self) -> (f64, f64) { + self.range + } + + pub fn size(&self) -> usize { + std::mem::size_of::<(f64, f64)>() + std::mem::size_of::() + } + } + + #[derive(Debug, Default)] + pub struct I64 { + range: (i64, i64), + num_rows: usize, + } + + impl I64 { + pub fn new(range: (i64, i64), rows: usize) -> Self { + Self { + range, + num_rows: rows, + } + } + + pub fn num_rows(&self) -> usize { + self.num_rows + } + + pub fn range(&self) -> (i64, i64) { + self.range + } + + pub fn size(&self) -> usize { + std::mem::size_of::<(i64, i64)>() + std::mem::size_of::() + } + } +} diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs new file mode 100644 index 0000000000..b760771cdb --- /dev/null +++ b/delorean_mem_qe/src/encoding.rs @@ -0,0 +1,299 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::iter; + +// TODO(edd): this is just for convenience. In reality one would store nulls +// separately and not use `Option`. +#[derive(Debug, Default)] +pub struct PlainFixedOption { + values: Vec>, +} + +impl PlainFixedOption { + pub fn size(&self) -> usize { + self.values.len() * std::mem::size_of::>() + } +} + +#[derive(Debug, Default)] +// No compression +pub struct PlainFixed { + values: Vec, +} + +impl PlainFixed { + pub fn size(&self) -> usize { + self.values.len() * std::mem::size_of::() + } +} + +impl From<&[i64]> for PlainFixed { + fn from(v: &[i64]) -> Self { + Self { values: v.to_vec() } + } +} + +impl From<&[f64]> for PlainFixed { + fn from(v: &[f64]) -> Self { + Self { values: v.to_vec() } + } +} + +#[derive(Debug, Default)] +pub struct DictionaryRLE { + // stores the mapping between an entry and its assigned index. + map: BTreeMap, + map_size: usize, // TODO(edd) this isn't perfect at all + + // stores tuples where each pair refers to a dictionary entry and the number + // of times the entry repeats. + run_lengths: Vec<(usize, u64)>, + run_length_size: usize, + + total: u64, +} + +impl DictionaryRLE { + pub fn new() -> Self { + Self { + map: BTreeMap::new(), + map_size: 0, + run_lengths: Vec::new(), + run_length_size: 0, + total: 0, + } + } + + pub fn push(&mut self, v: &str) { + self.push_additional(v, 1); + } + + pub fn push_additional(&mut self, v: &str, additional: u64) { + self.total += additional; + let idx = self.map.get(v); + match idx { + Some(idx) => { + if let Some((last_idx, rl)) = self.run_lengths.last_mut() { + if last_idx == idx { + // update the existing run-length + *rl += additional; + } else { + // start a new run-length + self.run_lengths.push((*idx, additional)); + self.run_length_size += std::mem::size_of::<(usize, u64)>(); + } + } + } + None => { + // New dictionary entry. + if idx.is_none() { + let idx = self.map.len(); + + self.map.insert(String::from(v), idx); + self.map_size += v.len() + std::mem::size_of::(); + + self.run_lengths.push((idx, additional)); + self.run_length_size += std::mem::size_of::<(usize, u64)>(); + return; + } + } + } + } + + // row_ids returns an iterator over the set of row ids matching the provided + // value. + pub fn row_ids(&self, value: &str) -> impl iter::Iterator { + let mut out: Vec = vec![]; + if let Some(idx) = self.map.get(value) { + let mut index: usize = 0; + for (other_idx, other_rl) in &self.run_lengths { + let start = index; + index += *other_rl as usize; + if other_idx == idx { + out.extend(start..index) + } + } + } + out.into_iter() + } + + // row_ids returns an iterator over the set of row ids matching the provided + // value. + pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap { + let mut bm = croaring::Bitmap::create(); + if let Some(idx) = self.map.get(value) { + let mut index: u64 = 0; + for (other_idx, other_rl) in &self.run_lengths { + let start = index; + index += other_rl; + if other_idx == idx { + bm.add_range(start..index); + } + } + } + bm + } + + // row_ids returns an iterator over the set of row ids matching the provided + // value + // pub fn row_ids(&'a self, value: &str) -> impl iter::Iterator { + // if let Some(idx) = self.map.get(value) { + // let mut index: usize = 0; + // return self.run_lengths.iter().flat_map(|(other_idx, other_rl)| { + // let start = index; + // index += *other_rl as usize; + + // if other_idx != idx { + // let iter: Box> = Box::new(iter::empty::()); + // return iter; + // } + // Box::new(start..index) + // }); + // } + + // // I need to return the same type as flatten_map or box the flatten_map return and this one?? + // unreachable!("for now"); + // } + + pub fn dictionary(&self) -> BTreeSet { + self.map.keys().cloned().collect::>() + } + + // get the logical value at the provided index, or None if there is no value + // at index. + pub fn value(&self, index: usize) -> Option<&str> { + if index < self.total as usize { + // build reverse mapping. + let mut idx_value = BTreeMap::new(); + for (k, v) in &self.map { + idx_value.insert(v, k.as_str()); + } + assert_eq!(idx_value.len(), self.map.len()); + + let mut total = 0; + for (idx, rl) in &self.run_lengths { + if total + rl > index as u64 { + return idx_value.get(idx).cloned(); + } + total += rl; + } + } + None + } + + // values materialises a vector of references to all logical values in the + // encoding. + pub fn values(&mut self) -> Vec<&str> { + let mut out = Vec::with_capacity(self.total as usize); + + // build reverse mapping. + let mut idx_value = BTreeMap::new(); + for (k, v) in &self.map { + idx_value.insert(v, k.as_str()); + } + assert_eq!(idx_value.len(), self.map.len()); + + for (idx, rl) in &self.run_lengths { + let &v = idx_value.get(&idx).unwrap(); + out.extend(iter::repeat(&v).take(*rl as usize)); + } + out + } + + pub fn size(&self) -> usize { + self.map_size + self.run_length_size + } +} + +// TODO(edd): improve perf here.... +impl std::convert::From> for DictionaryRLE { + fn from(vec: Vec<&str>) -> Self { + let mut drle = Self::new(); + for v in vec { + drle.push(v); + } + drle + } +} + +// TODO(edd): improve perf here.... +impl std::convert::From<&delorean_table::Packer> for DictionaryRLE { + fn from(p: &delorean_table::Packer) -> Self { + let mut drle = Self::new(); + for v in p.values() { + let s = v + .clone() + .unwrap_or_else(|| delorean_table::ByteArray::from("NULL")); + drle.push(s.as_utf8().unwrap()); + } + drle + } +} + +#[cfg(test)] +mod test { + #[test] + fn dict_rle() { + let mut drle = super::DictionaryRLE::new(); + drle.push("hello"); + drle.push("hello"); + drle.push("world"); + drle.push("hello"); + drle.push("hello"); + drle.push_additional("hello", 1); + + assert_eq!( + drle.values(), + ["hello", "hello", "world", "hello", "hello", "hello",] + ); + + drle.push_additional("zoo", 3); + assert_eq!( + drle.values(), + ["hello", "hello", "world", "hello", "hello", "hello", "zoo", "zoo", "zoo"] + ); + + assert_eq!(drle.value(0).unwrap(), "hello"); + assert_eq!(drle.value(1).unwrap(), "hello"); + assert_eq!(drle.value(2).unwrap(), "world"); + assert_eq!(drle.value(3).unwrap(), "hello"); + assert_eq!(drle.value(4).unwrap(), "hello"); + assert_eq!(drle.value(5).unwrap(), "hello"); + assert_eq!(drle.value(6).unwrap(), "zoo"); + assert_eq!(drle.value(7).unwrap(), "zoo"); + assert_eq!(drle.value(8).unwrap(), "zoo"); + } + + #[test] + fn row_ids() { + let mut drle = super::DictionaryRLE::new(); + drle.push_additional("abc", 3); + drle.push_additional("dre", 2); + drle.push("abc"); + + let ids = drle.row_ids("abc").collect::>(); + assert_eq!(ids, vec![0, 1, 2, 5]); + + let ids = drle.row_ids("dre").collect::>(); + assert_eq!(ids, vec![3, 4]); + + let ids = drle.row_ids("foo").collect::>(); + assert_eq!(ids, vec![]); + } + + #[test] + fn row_ids_roaring() { + let mut drle = super::DictionaryRLE::new(); + drle.push_additional("abc", 3); + drle.push_additional("dre", 2); + drle.push("abc"); + + let ids = drle.row_ids_roaring("abc").iter().collect::>(); + assert_eq!(ids, vec![0, 1, 2, 5]); + + let ids = drle.row_ids_roaring("dre").iter().collect::>(); + assert_eq!(ids, vec![3, 4]); + + let ids = drle.row_ids_roaring("foo").iter().collect::>(); + assert_eq!(ids, vec![]); + } +} diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs new file mode 100644 index 0000000000..8c24ab7c7c --- /dev/null +++ b/delorean_mem_qe/src/lib.rs @@ -0,0 +1,28 @@ +pub mod column; +pub mod encoding; +pub mod segment; + +use segment::Segment; + +#[derive(Debug, Default)] +pub struct Store { + segments: Vec, + + store_size: usize, +} + +impl Store { + pub fn add_segment(&mut self, segment: Segment) { + self.store_size += segment.size(); + self.segments.push(segment); + } + + /// The total size of all segments in the store.s + pub fn size(&self) -> usize { + self.store_size + } + + pub fn segment_total(&self) -> usize { + self.segments.len() + } +} diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs new file mode 100644 index 0000000000..873364de67 --- /dev/null +++ b/delorean_mem_qe/src/segment.rs @@ -0,0 +1,95 @@ +use std::collections::BTreeMap; + +use super::column; +use super::column::Column; + +#[derive(Debug, Default)] +pub struct Segment { + meta: SegmentMetaData, + + // Columns within a segment + columns: Vec, + // string_columns: Vec, + // f64_columns: Vec, +} + +impl Segment { + pub fn new(rows: usize) -> Self { + let mut segment = Self::default(); + segment.meta.rows = rows; + segment + } + + pub fn num_rows(&self) -> usize { + self.meta.rows + } + + pub fn column_names(&self) -> Vec { + self.meta.column_names.clone() + } + + pub fn time_range(&self) -> (i64, i64) { + self.meta.time_range + } + + pub fn add_column(&mut self, name: &str, c: column::Column) { + // TODO(edd) yuk + if name == "time" { + if let column::Column::Integer(ts) = &c { + self.meta.time_range = ts.column_range(); + } else { + panic!("incorrect column type for time"); + } + } + self.meta.rows = c.num_rows(); + + // validate column doesn't already exist in segment + assert!(!self.meta.column_names.contains(&name.to_owned())); + self.meta.column_names.push(name.to_owned()); + self.columns.push(c); + } + + // TODO - iterator.... + pub fn size(&self) -> usize { + let mut size = 0; + for c in &self.columns { + size += c.size(); + } + size + } + + // Returns the size of each of the segment's columns in bytes. + pub fn column_sizes(&self) -> BTreeMap { + let mut column_sizes = BTreeMap::new(); + let names = self.column_names(); + for (i, column) in self.columns.iter().enumerate() { + match column { + Column::String(c) => { + column_sizes.insert(names[i].clone(), c.size()); + } + Column::Float(c) => { + column_sizes.insert(names[i].clone(), c.size()); + } + Column::Integer(c) => { + column_sizes.insert(names[i].clone(), c.size()); + } + } + } + column_sizes + } +} + +/// Meta data for a segment. This data is mainly used to determine if a segment +/// may contain value for answering a query. +#[derive(Debug, Default)] +pub struct SegmentMetaData { + size: usize, // TODO + rows: usize, + + column_names: Vec, + time_range: (i64, i64), + // TODO column sort order +} + +#[cfg(test)] +mod test {} From 8670af5d30504767393389f2fc100fc7560f544d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 17:07:40 +0100 Subject: [PATCH 05/73] feat: column min --- delorean_mem_qe/src/bin/main.rs | 45 +++++++++++++++++++++++++++------ delorean_mem_qe/src/column.rs | 15 +++++++++++ delorean_mem_qe/src/lib.rs | 6 ++++- delorean_mem_qe/src/segment.rs | 43 ++++++++++++++++++++++++++----- 4 files changed, 93 insertions(+), 16 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index c7bdc932b3..9c5564ea69 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -4,7 +4,7 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader}; use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; -use delorean_mem_qe::column::Column; +use delorean_mem_qe::column::{Column, Scalar}; use delorean_mem_qe::segment::Segment; use delorean_mem_qe::Store; @@ -24,26 +24,52 @@ pub enum Error { } fn main() { + let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); + let reader = ipc::reader::StreamReader::try_new(r).unwrap(); + let mut store = Store::default(); - read_arrow_file(&mut store); + build_store(reader, &mut store).unwrap(); println!( "total segments {:?} with total size {:?}", store.segment_total(), store.size(), ); + + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_min = 0; + for _ in 1..10000 { + let now = std::time::Instant::now(); + let segments = store.segments(); + let min = segments.column_min("time").unwrap(); + total_time += now.elapsed(); + + if let Scalar::Integer(v) = min { + total_min += v + } + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + 10000, + total_time, + total_time / 10000, + total_min + ); + // println!("{:?} min -> {:?} in {:?}", "time", min, elapsed); } -fn read_arrow_file(store: &mut Store) { - let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); - let mut reader = ipc::reader::StreamReader::try_new(r).unwrap(); - while let Some(batch) = reader.next_batch().unwrap() { - let segment = record_batch_to_segment(&batch).unwrap(); +fn build_store( + mut reader: arrow::ipc::reader::StreamReader, + store: &mut Store, +) -> Result<(), Error> { + while let Some(rb) = reader.next_batch().unwrap() { + let segment = convert_record_batch(rb)?; store.add_segment(segment); } + Ok(()) } -fn record_batch_to_segment(rb: &RecordBatch) -> Result { +fn convert_record_batch(rb: RecordBatch) -> Result { let mut segment = Segment::default(); // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); @@ -54,11 +80,13 @@ fn record_batch_to_segment(rb: &RecordBatch) -> Result { .as_any() .downcast_ref::() .unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Int64 => { let arr = column.as_any().downcast_ref::().unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); } @@ -81,6 +109,7 @@ fn record_batch_to_segment(rb: &RecordBatch) -> Result { count = 1; } } + segment.add_column(rb.schema().field(i).name(), Column::String(column)); } datatypes::DataType::Boolean => { diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 188d0dd193..81627dcd11 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -2,6 +2,13 @@ use std::convert::From; use super::encoding; +#[derive(Debug, PartialEq, PartialOrd)] +pub enum Scalar<'a> { + String(&'a str), + Float(f64), + Integer(i64), +} + #[derive(Debug)] pub enum Column { String(String), @@ -27,6 +34,14 @@ impl Column { Column::Integer(c) => c.size(), } } + + pub fn min(&self) -> Scalar { + match self { + Column::String(c) => Scalar::String(c.meta.range().0), + Column::Float(c) => Scalar::Float(c.meta.range().0), + Column::Integer(c) => Scalar::Integer(c.meta.range().0), + } + } } impl From<&[f64]> for Column { diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 8c24ab7c7c..65cc7f21f4 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -2,7 +2,7 @@ pub mod column; pub mod encoding; pub mod segment; -use segment::Segment; +use segment::{Segment, Segments}; #[derive(Debug, Default)] pub struct Store { @@ -25,4 +25,8 @@ impl Store { pub fn segment_total(&self) -> usize { self.segments.len() } + + pub fn segments(&self) -> Segments { + Segments::new(&self.segments) + } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 873364de67..d12dcfe14d 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -9,8 +9,6 @@ pub struct Segment { // Columns within a segment columns: Vec, - // string_columns: Vec, - // f64_columns: Vec, } impl Segment { @@ -24,8 +22,8 @@ impl Segment { self.meta.rows } - pub fn column_names(&self) -> Vec { - self.meta.column_names.clone() + pub fn column_names(&self) -> &[String] { + &self.meta.column_names } pub fn time_range(&self) -> (i64, i64) { @@ -65,13 +63,13 @@ impl Segment { for (i, column) in self.columns.iter().enumerate() { match column { Column::String(c) => { - column_sizes.insert(names[i].clone(), c.size()); + column_sizes.insert(names[i].to_owned(), c.size()); } Column::Float(c) => { - column_sizes.insert(names[i].clone(), c.size()); + column_sizes.insert(names[i].to_owned(), c.size()); } Column::Integer(c) => { - column_sizes.insert(names[i].clone(), c.size()); + column_sizes.insert(names[i].to_owned(), c.size()); } } } @@ -79,6 +77,37 @@ impl Segment { } } +pub struct Segments<'a> { + segments: &'a [Segment], +} + +impl<'a> Segments<'a> { + pub fn new(segments: &'a [Segment]) -> Self { + Self { segments } + } + + /// Returns the minimum value for a column in a set of segments. + pub fn column_min(&self, column_name: &str) -> Option { + if self.segments.is_empty() { + return None; + } + + let mut min_min: Option = None; + for segment in self.segments { + if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { + let min = Some(segment.columns[i].min()); + if min_min.is_none() { + min_min = min + } else if min_min > min { + min_min = min; + } + } + } + + min_min + } +} + /// Meta data for a segment. This data is mainly used to determine if a segment /// may contain value for answering a query. #[derive(Debug, Default)] From 527083f7a0657939f517e9eac69231f12c3da359 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 18:32:16 +0100 Subject: [PATCH 06/73] feat: column max and column first --- delorean_mem_qe/src/bin/main.rs | 89 +++++++++++++++++++++++++-------- delorean_mem_qe/src/column.rs | 58 +++++++++++++++++++++ delorean_mem_qe/src/encoding.rs | 68 ++++++++++++++++--------- delorean_mem_qe/src/segment.rs | 64 ++++++++++++++++++++++++ 4 files changed, 236 insertions(+), 43 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 9c5564ea69..9eb9b584fb 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -36,26 +36,9 @@ fn main() { store.size(), ); - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_min = 0; - for _ in 1..10000 { - let now = std::time::Instant::now(); - let segments = store.segments(); - let min = segments.column_min("time").unwrap(); - total_time += now.elapsed(); - - if let Scalar::Integer(v) = min { - total_min += v - } - } - println!( - "Ran {:?} in {:?} {:?} / call {:?}", - 10000, - total_time, - total_time / 10000, - total_min - ); - // println!("{:?} min -> {:?} in {:?}", "time", min, elapsed); + // time_column_min_time(&store); + // time_column_max_time(&store); + time_column_first(&store); } fn build_store( @@ -120,3 +103,69 @@ fn convert_record_batch(rb: RecordBatch) -> Result { } Ok(segment) } + +fn time_column_min_time(store: &Store) { + let repeat = 1000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_min = 0; + for _ in 1..repeat { + let now = std::time::Instant::now(); + let segments = store.segments(); + let min = segments.column_min("time").unwrap(); + total_time += now.elapsed(); + + if let Scalar::Integer(v) = min { + total_min += v + } + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_min + ); +} + +fn time_column_max_time(store: &Store) { + let repeat = 1000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + for _ in 1..repeat { + let now = std::time::Instant::now(); + let segments = store.segments(); + let max = segments.column_max("time").unwrap(); + total_time += now.elapsed(); + + if let Scalar::Integer(v) = max { + total_max += v + } + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} + +fn time_column_first(store: &Store) { + let repeat = 100000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + for _ in 1..repeat { + let now = std::time::Instant::now(); + let segments = store.segments(); + let res = segments.first("host").unwrap(); + total_time += now.elapsed(); + total_max += res.0; + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 81627dcd11..e2233e06cc 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -35,6 +35,32 @@ impl Column { } } + pub fn value(&self, row_id: usize) -> Option { + match self { + Column::String(c) => { + if row_id >= self.num_rows() { + return None; + } + if let Some(v) = c.value(row_id) { + return Some(Scalar::String(v)); + }; + None + } + Column::Float(c) => { + if row_id >= self.num_rows() { + return None; + } + Some(Scalar::Float(c.value(row_id))) + } + Column::Integer(c) => { + if row_id >= self.num_rows() { + return None; + } + Some(Scalar::Integer(c.value(row_id))) + } + } + } + pub fn min(&self) -> Scalar { match self { Column::String(c) => Scalar::String(c.meta.range().0), @@ -42,6 +68,14 @@ impl Column { Column::Integer(c) => Scalar::Integer(c.meta.range().0), } } + + pub fn max(&self) -> Scalar { + match self { + Column::String(c) => Scalar::String(c.meta.range().1), + Column::Float(c) => Scalar::Float(c.meta.range().1), + Column::Integer(c) => Scalar::Integer(c.meta.range().1), + } + } } impl From<&[f64]> for Column { @@ -82,6 +116,10 @@ impl String { pub fn size(&self) -> usize { self.meta.size() + self.data.size() } + + pub fn value(&self, row_id: usize) -> Option<&std::string::String> { + self.data.value(row_id) + } } #[derive(Debug, Default)] @@ -100,6 +138,10 @@ impl Float { pub fn size(&self) -> usize { self.meta.size() + self.data.size() } + + pub fn value(&self, row_id: usize) -> f64 { + self.data.value(row_id) + } } impl From<&[f64]> for Float { @@ -137,6 +179,18 @@ impl Integer { pub fn size(&self) -> usize { self.meta.size() + self.data.size() } + + pub fn value(&self, row_id: usize) -> i64 { + self.data.value(row_id) + } + + /// Find the first logical row that contains this value. + pub fn row_id_for_value(&self, v: i64) -> Option { + if !self.meta.maybe_contains_value(v) { + return None; + } + self.data.row_id_for_value(v) + } } impl From<&[i64]> for Integer { @@ -233,6 +287,10 @@ pub mod metadata { } } + pub fn maybe_contains_value(&self, v: i64) -> bool { + self.range.0 <= v && v <= self.range.1 + } + pub fn num_rows(&self) -> usize { self.num_rows } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index b760771cdb..e06dac8b3d 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -18,30 +18,53 @@ impl PlainFixedOption { // No compression pub struct PlainFixed { values: Vec, + total_order: bool, // if true the column is totally ordered ascending. } -impl PlainFixed { +impl PlainFixed +where + T: PartialEq + Copy, +{ pub fn size(&self) -> usize { self.values.len() * std::mem::size_of::() } + + pub fn row_id_for_value(&self, v: T) -> Option { + self.values.iter().position(|x| *x == v) + } + + // get value at row_id. Panics if out of bounds. + pub fn value(&self, row_id: usize) -> T { + self.values[row_id] + } } impl From<&[i64]> for PlainFixed { fn from(v: &[i64]) -> Self { - Self { values: v.to_vec() } + Self { + values: v.to_vec(), + total_order: false, + } } } impl From<&[f64]> for PlainFixed { fn from(v: &[f64]) -> Self { - Self { values: v.to_vec() } + Self { + values: v.to_vec(), + total_order: false, + } } } #[derive(Debug, Default)] pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. - map: BTreeMap, + entry_index: BTreeMap, + + // stores the mapping between an index and its entry. + index_entry: BTreeMap, + map_size: usize, // TODO(edd) this isn't perfect at all // stores tuples where each pair refers to a dictionary entry and the number @@ -55,7 +78,8 @@ pub struct DictionaryRLE { impl DictionaryRLE { pub fn new() -> Self { Self { - map: BTreeMap::new(), + entry_index: BTreeMap::new(), + index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), run_length_size: 0, @@ -69,7 +93,7 @@ impl DictionaryRLE { pub fn push_additional(&mut self, v: &str, additional: u64) { self.total += additional; - let idx = self.map.get(v); + let idx = self.entry_index.get(v); match idx { Some(idx) => { if let Some((last_idx, rl)) = self.run_lengths.last_mut() { @@ -86,9 +110,10 @@ impl DictionaryRLE { None => { // New dictionary entry. if idx.is_none() { - let idx = self.map.len(); + let idx = self.entry_index.len(); - self.map.insert(String::from(v), idx); + self.entry_index.insert(String::from(v), idx); + self.index_entry.insert(idx, String::from(v)); self.map_size += v.len() + std::mem::size_of::(); self.run_lengths.push((idx, additional)); @@ -103,7 +128,7 @@ impl DictionaryRLE { // value. pub fn row_ids(&self, value: &str) -> impl iter::Iterator { let mut out: Vec = vec![]; - if let Some(idx) = self.map.get(value) { + if let Some(idx) = self.entry_index.get(value) { let mut index: usize = 0; for (other_idx, other_rl) in &self.run_lengths { let start = index; @@ -120,7 +145,7 @@ impl DictionaryRLE { // value. pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap { let mut bm = croaring::Bitmap::create(); - if let Some(idx) = self.map.get(value) { + if let Some(idx) = self.entry_index.get(value) { let mut index: u64 = 0; for (other_idx, other_rl) in &self.run_lengths { let start = index; @@ -155,24 +180,20 @@ impl DictionaryRLE { // } pub fn dictionary(&self) -> BTreeSet { - self.map.keys().cloned().collect::>() + self.entry_index + .keys() + .cloned() + .collect::>() } // get the logical value at the provided index, or None if there is no value // at index. - pub fn value(&self, index: usize) -> Option<&str> { + pub fn value(&self, index: usize) -> Option<&String> { if index < self.total as usize { - // build reverse mapping. - let mut idx_value = BTreeMap::new(); - for (k, v) in &self.map { - idx_value.insert(v, k.as_str()); - } - assert_eq!(idx_value.len(), self.map.len()); - let mut total = 0; for (idx, rl) in &self.run_lengths { if total + rl > index as u64 { - return idx_value.get(idx).cloned(); + return self.index_entry.get(idx); } total += rl; } @@ -187,10 +208,10 @@ impl DictionaryRLE { // build reverse mapping. let mut idx_value = BTreeMap::new(); - for (k, v) in &self.map { + for (k, v) in &self.entry_index { idx_value.insert(v, k.as_str()); } - assert_eq!(idx_value.len(), self.map.len()); + assert_eq!(idx_value.len(), self.entry_index.len()); for (idx, rl) in &self.run_lengths { let &v = idx_value.get(&idx).unwrap(); @@ -200,7 +221,8 @@ impl DictionaryRLE { } pub fn size(&self) -> usize { - self.map_size + self.run_length_size + // mapping and reverse mapping then the rles + 2 * self.map_size + self.run_length_size } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index d12dcfe14d..e1ccf413f0 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -9,6 +9,7 @@ pub struct Segment { // Columns within a segment columns: Vec, + time_column_idx: usize, } impl Segment { @@ -26,6 +27,14 @@ impl Segment { &self.meta.column_names } + /// column returns the column with name + pub fn column(&self, name: &str) -> Option<&column::Column> { + if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) { + return self.columns.get(*id); + } + None + } + pub fn time_range(&self) -> (i64, i64) { self.meta.time_range } @@ -38,6 +47,7 @@ impl Segment { } else { panic!("incorrect column type for time"); } + self.time_column_idx = self.columns.len(); } self.meta.rows = c.num_rows(); @@ -106,6 +116,60 @@ impl<'a> Segments<'a> { min_min } + + /// Returns the maximum value for a column in a set of segments. + pub fn column_max(&self, column_name: &str) -> Option { + if self.segments.is_empty() { + return None; + } + + let mut max_max: Option = None; + for segment in self.segments { + if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { + let max = Some(segment.columns[i].max()); + if max_max.is_none() { + max_max = max + } else if max_max < max { + max_max = max; + } + } + } + + max_max + } + + /// Returns the first value for a column in a set of segments. + /// + /// TODO(edd): could return NULL value.. + pub fn first(&self, column_name: &str) -> Option<(i64, Option)> { + if self.segments.is_empty() { + return None; + } + + let mut first_first: Option<(i64, Option)> = None; + for segment in self.segments { + // first find the logical row id of the minimum timestamp value + if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { + // TODO(edd): clean up unwr + let min_ts = ts_col.column_range().0; + let min_ts_id = ts_col.row_id_for_value(min_ts).unwrap(); + + // now we have row id we can get value for that row id + let value = segment.column(column_name).unwrap().value(min_ts_id); + + match &first_first { + Some(prev) => { + if prev.0 > min_ts { + first_first = Some((min_ts, value)); + } + } + None => first_first = Some((min_ts, value)), + } + } + } + + first_first + } } /// Meta data for a segment. This data is mainly used to determine if a segment From a5384d1771644e63848c8470709d7d6500fb6e2c Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 20:15:59 +0100 Subject: [PATCH 07/73] feat: column last and filter by time --- delorean_mem_qe/src/bin/main.rs | 11 ++++-- delorean_mem_qe/src/lib.rs | 2 +- delorean_mem_qe/src/segment.rs | 64 ++++++++++++++++++++++++++++++--- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 9eb9b584fb..4a00d0b181 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -38,7 +38,14 @@ fn main() { // time_column_min_time(&store); // time_column_max_time(&store); - time_column_first(&store); + // time_column_first(&store); + let segments = store.segments(); + let res = segments.last("host").unwrap(); + println!("{:?}", res); + + let segments = segments.filter_by_time(1590036110000000, 1590044410000000); + let res = segments.first("env").unwrap(); + println!("{:?}", res); } fn build_store( @@ -52,7 +59,7 @@ fn build_store( Ok(()) } -fn convert_record_batch(rb: RecordBatch) -> Result { +fn convert_record_batch<'a>(rb: RecordBatch) -> Result { let mut segment = Segment::default(); // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 65cc7f21f4..26b78d9963 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -27,6 +27,6 @@ impl Store { } pub fn segments(&self) -> Segments { - Segments::new(&self.segments) + Segments::new(self.segments.iter().collect::>()) } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index e1ccf413f0..6804acb2d0 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -88,14 +88,24 @@ impl Segment { } pub struct Segments<'a> { - segments: &'a [Segment], + segments: Vec<&'a Segment>, } impl<'a> Segments<'a> { - pub fn new(segments: &'a [Segment]) -> Self { + pub fn new(segments: Vec<&'a Segment>) -> Self { Self { segments } } + pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> { + let mut segments: Vec<&Segment> = vec![]; + for segment in &self.segments { + if segment.meta.overlaps_time_range(min, max) { + segments.push(segment); + } + } + Self::new(segments) + } + /// Returns the minimum value for a column in a set of segments. pub fn column_min(&self, column_name: &str) -> Option { if self.segments.is_empty() { @@ -103,7 +113,7 @@ impl<'a> Segments<'a> { } let mut min_min: Option = None; - for segment in self.segments { + for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let min = Some(segment.columns[i].min()); if min_min.is_none() { @@ -124,7 +134,7 @@ impl<'a> Segments<'a> { } let mut max_max: Option = None; - for segment in self.segments { + for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let max = Some(segment.columns[i].max()); if max_max.is_none() { @@ -147,7 +157,7 @@ impl<'a> Segments<'a> { } let mut first_first: Option<(i64, Option)> = None; - for segment in self.segments { + for segment in &self.segments { // first find the logical row id of the minimum timestamp value if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { // TODO(edd): clean up unwr @@ -170,6 +180,39 @@ impl<'a> Segments<'a> { first_first } + + /// Returns the last value for a column in a set of segments. + /// + /// TODO(edd): could return NULL value.. + pub fn last(&self, column_name: &str) -> Option<(i64, Option)> { + if self.segments.is_empty() { + return None; + } + + let mut last_last: Option<(i64, Option)> = None; + for segment in &self.segments { + // first find the logical row id of the minimum timestamp value + if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { + // TODO(edd): clean up unwr + let max_ts = ts_col.column_range().1; + let max_ts_id = ts_col.row_id_for_value(max_ts).unwrap(); + + // now we have row id we can get value for that row id + let value = segment.column(column_name).unwrap().value(max_ts_id); + + match &last_last { + Some(prev) => { + if prev.0 < max_ts { + last_last = Some((max_ts, value)); + } + } + None => last_last = Some((max_ts, value)), + } + } + } + + last_last + } } /// Meta data for a segment. This data is mainly used to determine if a segment @@ -184,5 +227,16 @@ pub struct SegmentMetaData { // TODO column sort order } +impl SegmentMetaData { + pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { + let result = self.time_range.0 <= to && from <= self.time_range.1; + println!( + "segment with ({:?}) overlaps ({:?}, {:?}) -- {:?}", + self.time_range, from, to, result + ); + result + } +} + #[cfg(test)] mod test {} From 69bc0424bf13d733e90fdedfa2c5bb1690f849c1 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 21:17:49 +0100 Subject: [PATCH 08/73] feat: add filter by tag --- delorean_mem_qe/src/bin/main.rs | 19 +++++++++++--- delorean_mem_qe/src/column.rs | 44 +++++++++++++++++++++++++++++++++ delorean_mem_qe/src/segment.rs | 16 ++++++++++++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 4a00d0b181..82e5adc4e4 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -43,7 +43,9 @@ fn main() { let res = segments.last("host").unwrap(); println!("{:?}", res); - let segments = segments.filter_by_time(1590036110000000, 1590044410000000); + let segments = segments + .filter_by_time(1590036110000000, 1590044410000000) + .filter_by_predicate_eq("env", &column::Scalar::String("toolsus1")); let res = segments.first("env").unwrap(); println!("{:?}", res); } @@ -66,6 +68,9 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result { for (i, column) in rb.columns().iter().enumerate() { match *column.data_type() { datatypes::DataType::Float64 => { + if column.null_count() > 0 { + panic!("null floats"); + } let arr = column .as_any() .downcast_ref::() @@ -75,18 +80,24 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result { segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Int64 => { + if column.null_count() > 0 { + panic!("null times"); + } let arr = column.as_any().downcast_ref::().unwrap(); let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Utf8 => { + if column.null_count() > 0 { + panic!("null tag"); + } let arr = column .as_any() .downcast_ref::() .unwrap(); - let mut column = column::String::default(); + let mut c = column::String::default(); let mut prev = arr.value(0); let mut count = 1_u64; for j in 1..arr.len() { @@ -94,13 +105,13 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result { if prev == next { count += 1; } else { - column.add_additional(prev, count); + c.add_additional(prev, count); prev = next; count = 1; } } - segment.add_column(rb.schema().field(i).name(), Column::String(column)); + segment.add_column(rb.schema().field(i).name(), Column::String(c)); } datatypes::DataType::Boolean => { panic!("unsupported"); diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index e2233e06cc..3676e13c2d 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -61,6 +61,32 @@ impl Column { } } + pub fn maybe_contains(&self, value: &Scalar) -> bool { + match self { + Column::String(c) => { + if let Scalar::String(v) = value { + c.meta.maybe_contains_value(v.to_string()) + } else { + panic!("invalid value"); + } + } + Column::Float(c) => { + if let Scalar::Float(v) = value { + c.meta.maybe_contains_value(v.to_owned()) + } else { + panic!("invalid value"); + } + } + Column::Integer(c) => { + if let Scalar::Integer(v) = value { + c.meta.maybe_contains_value(v.to_owned()) + } else { + panic!("invalid value"); + } + } + } + } + pub fn min(&self) -> Scalar { match self { Column::String(c) => Scalar::String(c.meta.range().0), @@ -237,6 +263,15 @@ pub mod metadata { self.num_rows } + pub fn maybe_contains_value(&self, v: String) -> bool { + let res = self.range.0 <= v && v <= self.range.1; + println!( + "column with ({:?}) maybe contain {:?} -- {:?}", + self.range, v, res + ); + res + } + pub fn range(&self) -> (&str, &str) { (&self.range.0, &self.range.1) } @@ -260,6 +295,15 @@ pub mod metadata { } } + pub fn maybe_contains_value(&self, v: f64) -> bool { + let res = self.range.0 <= v && v <= self.range.1; + println!( + "column with ({:?}) maybe contain {:?} -- {:?}", + self.range, v, res + ); + res + } + pub fn num_rows(&self) -> usize { self.num_rows } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 6804acb2d0..1f1084c0e8 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -106,6 +106,22 @@ impl<'a> Segments<'a> { Self::new(segments) } + pub fn filter_by_predicate_eq( + &self, + column_name: &str, + value: &column::Scalar, + ) -> Segments<'a> { + let mut segments: Vec<&Segment> = vec![]; + for segment in &self.segments { + if let Some(col) = segment.column(column_name) { + if col.maybe_contains(&value) { + segments.push(segment); + } + } + } + Self::new(segments) + } + /// Returns the minimum value for a column in a set of segments. pub fn column_min(&self, column_name: &str) -> Option { if self.segments.is_empty() { From d43d7bb3d4763bd4b1e0a1731856801f7c1e821b Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 5 Aug 2020 22:42:26 +0100 Subject: [PATCH 09/73] feat: nullable tags --- delorean_mem_qe/src/bin/main.rs | 26 +++++--- delorean_mem_qe/src/column.rs | 70 ++++++++++++-------- delorean_mem_qe/src/encoding.rs | 113 ++++++++++++++++++++++---------- delorean_mem_qe/src/segment.rs | 46 ++++++------- 4 files changed, 159 insertions(+), 96 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 82e5adc4e4..ae104d35a8 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -45,8 +45,8 @@ fn main() { let segments = segments .filter_by_time(1590036110000000, 1590044410000000) - .filter_by_predicate_eq("env", &column::Scalar::String("toolsus1")); - let res = segments.first("env").unwrap(); + .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1")); + let res = segments.first("env"); println!("{:?}", res); } @@ -61,7 +61,7 @@ fn build_store( Ok(()) } -fn convert_record_batch<'a>(rb: RecordBatch) -> Result { +fn convert_record_batch(rb: RecordBatch) -> Result { let mut segment = Segment::default(); // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); @@ -89,23 +89,31 @@ fn convert_record_batch<'a>(rb: RecordBatch) -> Result { segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Utf8 => { - if column.null_count() > 0 { - panic!("null tag"); - } let arr = column .as_any() .downcast_ref::() .unwrap(); let mut c = column::String::default(); - let mut prev = arr.value(0); + let mut prev: Option<&str> = None; + if !column.is_null(0) { + prev = Some(arr.value(0)); + } + let mut count = 1_u64; for j in 1..arr.len() { - let next = arr.value(j); + let mut next = Some(arr.value(j)); + if column.is_null(j) { + next = None; + } + if prev == next { count += 1; } else { - c.add_additional(prev, count); + match prev { + Some(x) => c.add_additional(Some(x.to_string()), count), + None => c.add_additional(None, count), + } prev = next; count = 1; } diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 3676e13c2d..4597cf80c5 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -41,10 +41,11 @@ impl Column { if row_id >= self.num_rows() { return None; } - if let Some(v) = c.value(row_id) { - return Some(Scalar::String(v)); - }; - None + + match c.value(row_id) { + Some(v) => Some(Scalar::String(v)), + None => None, + } } Column::Float(c) => { if row_id >= self.num_rows() { @@ -65,7 +66,7 @@ impl Column { match self { Column::String(c) => { if let Scalar::String(v) = value { - c.meta.maybe_contains_value(v.to_string()) + c.meta.maybe_contains_value(&v.to_string()) } else { panic!("invalid value"); } @@ -87,19 +88,31 @@ impl Column { } } - pub fn min(&self) -> Scalar { + // FIXME(edd): Support NULL integers and floats + pub fn min(&self) -> Option { match self { - Column::String(c) => Scalar::String(c.meta.range().0), - Column::Float(c) => Scalar::Float(c.meta.range().0), - Column::Integer(c) => Scalar::Integer(c.meta.range().0), + Column::String(c) => { + if let Some(min) = c.meta.range().0 { + return Some(Scalar::String(min)); + } + None + } + Column::Float(c) => Some(Scalar::Float(c.meta.range().0)), + Column::Integer(c) => Some(Scalar::Integer(c.meta.range().0)), } } - pub fn max(&self) -> Scalar { + // FIXME(edd): Support NULL integers and floats + pub fn max(&self) -> Option { match self { - Column::String(c) => Scalar::String(c.meta.range().1), - Column::Float(c) => Scalar::Float(c.meta.range().1), - Column::Integer(c) => Scalar::Integer(c.meta.range().1), + Column::String(c) => { + if let Some(max) = c.meta.range().1 { + return Some(Scalar::String(max)); + } + None + } + Column::Float(c) => Some(Scalar::Float(c.meta.range().1)), + Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)), } } } @@ -126,16 +139,16 @@ pub struct String { impl String { pub fn add(&mut self, s: &str) { - self.meta.add(s); + self.meta.add(Some(s.to_string())); self.data.push(s); } - pub fn add_additional(&mut self, s: &str, additional: u64) { - self.meta.add(s); + pub fn add_additional(&mut self, s: Option, additional: u64) { + self.meta.add(s.clone()); self.data.push_additional(s, additional); } - pub fn column_range(&self) -> (&str, &str) { + pub fn column_range(&self) -> (Option<&std::string::String>, Option<&std::string::String>) { self.meta.range() } @@ -241,21 +254,21 @@ impl From<&[i64]> for Integer { pub mod metadata { #[derive(Debug, Default)] pub struct Str { - range: (String, String), + range: (Option, Option), num_rows: usize, // sparse_index: BTreeMap, } impl Str { - pub fn add(&mut self, s: &str) { + pub fn add(&mut self, s: Option) { self.num_rows += 1; - if self.range.0.as_str() > s { - self.range.0 = s.to_owned(); + if self.range.0 > s { + self.range.0 = s.clone(); } - if self.range.1.as_str() < s { - self.range.1 = s.to_owned(); + if self.range.1 < s { + self.range.1 = s; } } @@ -263,8 +276,8 @@ pub mod metadata { self.num_rows } - pub fn maybe_contains_value(&self, v: String) -> bool { - let res = self.range.0 <= v && v <= self.range.1; + pub fn maybe_contains_value(&self, v: &str) -> bool { + let res = self.range.0 <= Some(v.to_string()) && Some(v.to_string()) <= self.range.1; println!( "column with ({:?}) maybe contain {:?} -- {:?}", self.range, v, res @@ -272,12 +285,13 @@ pub mod metadata { res } - pub fn range(&self) -> (&str, &str) { - (&self.range.0, &self.range.1) + pub fn range(&self) -> (Option<&String>, Option<&String>) { + (self.range.0.as_ref(), self.range.1.as_ref()) } pub fn size(&self) -> usize { - self.range.0.len() + self.range.1.len() + std::mem::size_of::() + // TODO!!!! + 0 //self.range.0.len() + self.range.1.len() + std::mem::size_of::() } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index e06dac8b3d..283398223f 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -60,10 +60,10 @@ impl From<&[f64]> for PlainFixed { #[derive(Debug, Default)] pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. - entry_index: BTreeMap, + entry_index: BTreeMap, usize>, // stores the mapping between an index and its entry. - index_entry: BTreeMap, + index_entry: BTreeMap>, map_size: usize, // TODO(edd) this isn't perfect at all @@ -88,12 +88,16 @@ impl DictionaryRLE { } pub fn push(&mut self, v: &str) { - self.push_additional(v, 1); + self.push_additional(Some(v.to_owned()), 1); } - pub fn push_additional(&mut self, v: &str, additional: u64) { + pub fn push_none(&mut self) { + self.push_additional(None, 1); + } + + pub fn push_additional(&mut self, v: Option, additional: u64) { self.total += additional; - let idx = self.entry_index.get(v); + let idx = self.entry_index.get(&v); match idx { Some(idx) => { if let Some((last_idx, rl)) = self.run_lengths.last_mut() { @@ -112,9 +116,12 @@ impl DictionaryRLE { if idx.is_none() { let idx = self.entry_index.len(); - self.entry_index.insert(String::from(v), idx); - self.index_entry.insert(idx, String::from(v)); - self.map_size += v.len() + std::mem::size_of::(); + self.entry_index.insert(v.clone(), idx); + if let Some(value) = &v { + self.map_size += value.len(); + } + self.index_entry.insert(idx, v); + self.map_size += 8 + std::mem::size_of::(); // TODO(edd): clean this option size up self.run_lengths.push((idx, additional)); self.run_length_size += std::mem::size_of::<(usize, u64)>(); @@ -126,9 +133,9 @@ impl DictionaryRLE { // row_ids returns an iterator over the set of row ids matching the provided // value. - pub fn row_ids(&self, value: &str) -> impl iter::Iterator { + pub fn row_ids(&self, value: Option) -> impl iter::Iterator { let mut out: Vec = vec![]; - if let Some(idx) = self.entry_index.get(value) { + if let Some(idx) = self.entry_index.get(&value) { let mut index: usize = 0; for (other_idx, other_rl) in &self.run_lengths { let start = index; @@ -143,9 +150,9 @@ impl DictionaryRLE { // row_ids returns an iterator over the set of row ids matching the provided // value. - pub fn row_ids_roaring(&self, value: &str) -> croaring::Bitmap { + pub fn row_ids_roaring(&self, value: Option) -> croaring::Bitmap { let mut bm = croaring::Bitmap::create(); - if let Some(idx) = self.entry_index.get(value) { + if let Some(idx) = self.entry_index.get(&value) { let mut index: u64 = 0; for (other_idx, other_rl) in &self.run_lengths { let start = index; @@ -179,11 +186,11 @@ impl DictionaryRLE { // unreachable!("for now"); // } - pub fn dictionary(&self) -> BTreeSet { + pub fn dictionary(&self) -> BTreeSet> { self.entry_index .keys() .cloned() - .collect::>() + .collect::>>() } // get the logical value at the provided index, or None if there is no value @@ -193,7 +200,12 @@ impl DictionaryRLE { let mut total = 0; for (idx, rl) in &self.run_lengths { if total + rl > index as u64 { - return self.index_entry.get(idx); + // TODO(edd): Can this really be idiomatic??? + match self.index_entry.get(idx) { + Some(&Some(ref result)) => return Some(result), + Some(&None) => return None, + None => return None, + } } total += rl; } @@ -203,19 +215,20 @@ impl DictionaryRLE { // values materialises a vector of references to all logical values in the // encoding. - pub fn values(&mut self) -> Vec<&str> { - let mut out = Vec::with_capacity(self.total as usize); + pub fn values(&mut self) -> Vec> { + let mut out: Vec> = Vec::with_capacity(self.total as usize); // build reverse mapping. let mut idx_value = BTreeMap::new(); for (k, v) in &self.entry_index { - idx_value.insert(v, k.as_str()); + idx_value.insert(v, k); } assert_eq!(idx_value.len(), self.entry_index.len()); for (idx, rl) in &self.run_lengths { - let &v = idx_value.get(&idx).unwrap(); - out.extend(iter::repeat(&v).take(*rl as usize)); + // TODO(edd): fix unwrap - we know that the value exists in map... + let v = idx_value.get(&idx).unwrap().as_ref(); + out.extend(iter::repeat(v).take(*rl as usize)); } out } @@ -261,17 +274,34 @@ mod test { drle.push("world"); drle.push("hello"); drle.push("hello"); - drle.push_additional("hello", 1); + drle.push_additional(Some("hello".to_string()), 1); assert_eq!( drle.values(), - ["hello", "hello", "world", "hello", "hello", "hello",] + [ + Some(&"hello".to_string()), + Some(&"hello".to_string()), + Some(&"world".to_string()), + Some(&"hello".to_string()), + Some(&"hello".to_string()), + Some(&"hello".to_string()) + ] ); - drle.push_additional("zoo", 3); + drle.push_additional(Some("zoo".to_string()), 3); assert_eq!( drle.values(), - ["hello", "hello", "world", "hello", "hello", "hello", "zoo", "zoo", "zoo"] + [ + Some(&"hello".to_string()), + Some(&"hello".to_string()), + Some(&"world".to_string()), + Some(&"hello".to_string()), + Some(&"hello".to_string()), + Some(&"hello".to_string()), + Some(&"zoo".to_string()), + Some(&"zoo".to_string()), + Some(&"zoo".to_string()), + ] ); assert_eq!(drle.value(0).unwrap(), "hello"); @@ -288,34 +318,49 @@ mod test { #[test] fn row_ids() { let mut drle = super::DictionaryRLE::new(); - drle.push_additional("abc", 3); - drle.push_additional("dre", 2); + drle.push_additional(Some("abc".to_string()), 3); + drle.push_additional(Some("dre".to_string()), 2); drle.push("abc"); - let ids = drle.row_ids("abc").collect::>(); + let ids = drle + .row_ids(Some("abc".to_string())) + .collect::>(); assert_eq!(ids, vec![0, 1, 2, 5]); - let ids = drle.row_ids("dre").collect::>(); + let ids = drle + .row_ids(Some("dre".to_string())) + .collect::>(); assert_eq!(ids, vec![3, 4]); - let ids = drle.row_ids("foo").collect::>(); + let ids = drle + .row_ids(Some("foo".to_string())) + .collect::>(); assert_eq!(ids, vec![]); } #[test] fn row_ids_roaring() { let mut drle = super::DictionaryRLE::new(); - drle.push_additional("abc", 3); - drle.push_additional("dre", 2); + drle.push_additional(Some("abc".to_string()), 3); + drle.push_additional(Some("dre".to_string()), 2); drle.push("abc"); - let ids = drle.row_ids_roaring("abc").iter().collect::>(); + let ids = drle + .row_ids_roaring(Some("abc".to_string())) + .iter() + .collect::>(); assert_eq!(ids, vec![0, 1, 2, 5]); - let ids = drle.row_ids_roaring("dre").iter().collect::>(); + let ids = drle + .row_ids_roaring(Some("dre".to_string())) + .iter() + .collect::>(); assert_eq!(ids, vec![3, 4]); - let ids = drle.row_ids_roaring("foo").iter().collect::>(); + let ids = drle + .row_ids_roaring(Some("foo".to_string())) + .iter() + .collect::>(); assert_eq!(ids, vec![]); } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 1f1084c0e8..4b4a58289a 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -87,6 +87,24 @@ impl Segment { } } +/// Meta data for a segment. This data is mainly used to determine if a segment +/// may contain value for answering a query. +#[derive(Debug, Default)] +pub struct SegmentMetaData { + size: usize, // TODO + rows: usize, + + column_names: Vec, + time_range: (i64, i64), + // TODO column sort order +} + +impl SegmentMetaData { + pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { + self.time_range.0 <= to && from <= self.time_range.1 + } +} + pub struct Segments<'a> { segments: Vec<&'a Segment>, } @@ -100,6 +118,7 @@ impl<'a> Segments<'a> { let mut segments: Vec<&Segment> = vec![]; for segment in &self.segments { if segment.meta.overlaps_time_range(min, max) { + println!("Segement {:?} overlaps", segment.meta); segments.push(segment); } } @@ -131,7 +150,7 @@ impl<'a> Segments<'a> { let mut min_min: Option = None; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { - let min = Some(segment.columns[i].min()); + let min = segment.columns[i].min(); if min_min.is_none() { min_min = min } else if min_min > min { @@ -152,7 +171,7 @@ impl<'a> Segments<'a> { let mut max_max: Option = None; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { - let max = Some(segment.columns[i].max()); + let max = segment.columns[i].max(); if max_max.is_none() { max_max = max } else if max_max < max { @@ -231,28 +250,5 @@ impl<'a> Segments<'a> { } } -/// Meta data for a segment. This data is mainly used to determine if a segment -/// may contain value for answering a query. -#[derive(Debug, Default)] -pub struct SegmentMetaData { - size: usize, // TODO - rows: usize, - - column_names: Vec, - time_range: (i64, i64), - // TODO column sort order -} - -impl SegmentMetaData { - pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { - let result = self.time_range.0 <= to && from <= self.time_range.1; - println!( - "segment with ({:?}) overlaps ({:?}, {:?}) -- {:?}", - self.time_range, from, to, result - ); - result - } -} - #[cfg(test)] mod test {} From 270bdefcb10f2185ba7657ba77e6a40aa5378340 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 6 Aug 2020 13:16:13 +0100 Subject: [PATCH 10/73] feat: add ability to scan column and materialise --- delorean_mem_qe/src/bin/main.rs | 58 +++++++++------ delorean_mem_qe/src/column.rs | 124 ++++++++++++++++++++++++++++++-- delorean_mem_qe/src/encoding.rs | 121 +++++++++++++++++++++++++++++-- delorean_mem_qe/src/segment.rs | 48 +++++++++---- 4 files changed, 308 insertions(+), 43 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index ae104d35a8..44776f035d 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -43,11 +43,16 @@ fn main() { let res = segments.last("host").unwrap(); println!("{:?}", res); - let segments = segments - .filter_by_time(1590036110000000, 1590044410000000) - .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1")); - let res = segments.first("env"); - println!("{:?}", res); + // let segments = segments + // .filter_by_time(1590036110000000, 1590044410000000) + // .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1")); + // let res = segments.first( + // "env", + // &column::Scalar::String("prod01-eu-central-1"), + // 1590036110000000, + // ); + // println!("{:?}", res); + // let segments = segments.filter_by_time(1590036110000000, 1590044410000000); } fn build_store( @@ -62,7 +67,7 @@ fn build_store( } fn convert_record_batch(rb: RecordBatch) -> Result { - let mut segment = Segment::default(); + let mut segment = Segment::new(rb.num_rows()); // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); for (i, column) in rb.columns().iter().enumerate() { @@ -95,28 +100,37 @@ fn convert_record_batch(rb: RecordBatch) -> Result { .unwrap(); let mut c = column::String::default(); - let mut prev: Option<&str> = None; - if !column.is_null(0) { - prev = Some(arr.value(0)); - } + let mut prev = if !column.is_null(0) { + Some(arr.value(0)) + } else { + None + }; let mut count = 1_u64; for j in 1..arr.len() { - let mut next = Some(arr.value(j)); - if column.is_null(j) { - next = None; - } + let next = if column.is_null(j) { + None + } else { + Some(arr.value(j)) + }; if prev == next { count += 1; - } else { - match prev { - Some(x) => c.add_additional(Some(x.to_string()), count), - None => c.add_additional(None, count), - } - prev = next; - count = 1; + continue; } + + match prev { + Some(x) => c.add_additional(Some(x.to_string()), count), + None => c.add_additional(None, count), + } + prev = next; + count = 1; + } + + // Add final batch to column if any + match prev { + Some(x) => c.add_additional(Some(x.to_string()), count), + None => c.add_additional(None, count), } segment.add_column(rb.schema().field(i).name(), Column::String(c)); @@ -183,7 +197,7 @@ fn time_column_first(store: &Store) { for _ in 1..repeat { let now = std::time::Instant::now(); let segments = store.segments(); - let res = segments.first("host").unwrap(); + let res = segments.first("host", 0).unwrap(); total_time += now.elapsed(); total_max += res.0; } diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 4597cf80c5..727e8f2c2e 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -9,6 +9,13 @@ pub enum Scalar<'a> { Integer(i64), } +#[derive(Debug)] +pub enum Vector<'a> { + String(Vec<&'a Option>), + Float(&'a [f64]), + Integer(&'a [i64]), +} + #[derive(Debug)] pub enum Column { String(String), @@ -62,6 +69,64 @@ impl Column { } } + /// materialise all rows including and after row_id + pub fn scan_from(&self, row_id: usize) -> Option { + if row_id >= self.num_rows() { + println!( + "asking for {:?} but only got {:?} rows", + row_id, + self.num_rows() + ); + return None; + } + + println!( + "asking for {:?} with a column having {:?} rows", + row_id, + self.num_rows() + ); + match self { + Column::String(c) => Some(Vector::String(c.scan_from(row_id))), + Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), + Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), + } + } + + /// Given the provided row_id scans the column until a non-null value found + /// or the column is exhausted. + pub fn scan_from_until_some(&self, row_id: usize) -> Option { + match self { + Column::String(c) => { + if row_id >= self.num_rows() { + return None; + } + + match c.scan_from_until_some(row_id) { + Some(v) => Some(Scalar::String(v)), + None => None, + } + } + Column::Float(c) => { + if row_id >= self.num_rows() { + return None; + } + match c.scan_from_until_some(row_id) { + Some(v) => Some(Scalar::Float(v)), + None => None, + } + } + Column::Integer(c) => { + if row_id >= self.num_rows() { + return None; + } + match c.scan_from_until_some(row_id) { + Some(v) => Some(Scalar::Integer(v)), + None => None, + } + } + } + } + pub fn maybe_contains(&self, value: &Scalar) -> bool { match self { Column::String(c) => { @@ -144,7 +209,7 @@ impl String { } pub fn add_additional(&mut self, s: Option, additional: u64) { - self.meta.add(s.clone()); + self.meta.add_repeated(s.clone(), additional as usize); self.data.push_additional(s, additional); } @@ -159,6 +224,15 @@ impl String { pub fn value(&self, row_id: usize) -> Option<&std::string::String> { self.data.value(row_id) } + + pub fn scan_from(&self, row_id: usize) -> Vec<&Option> { + self.data.scan_from(row_id) + } + + pub fn scan_from_until_some(&self, row_id: usize) -> Option<&std::string::String> { + unreachable!("don't need this"); + // self.data.scan_from_until_some(row_id) + } } #[derive(Debug, Default)] @@ -181,6 +255,14 @@ impl Float { pub fn value(&self, row_id: usize) -> f64 { self.data.value(row_id) } + + pub fn scan_from(&self, row_id: usize) -> &[f64] { + self.data.scan_from(row_id) + } + + pub fn scan_from_until_some(&self, row_id: usize) -> Option { + self.data.scan_from_until_some(row_id) + } } impl From<&[f64]> for Float { @@ -223,12 +305,28 @@ impl Integer { self.data.value(row_id) } + pub fn scan_from(&self, row_id: usize) -> &[i64] { + self.data.scan_from(row_id) + } + + pub fn scan_from_until_some(&self, row_id: usize) -> Option { + self.data.scan_from_until_some(row_id) + } + /// Find the first logical row that contains this value. - pub fn row_id_for_value(&self, v: i64) -> Option { + pub fn row_id_eq_value(&self, v: i64) -> Option { if !self.meta.maybe_contains_value(v) { return None; } - self.data.row_id_for_value(v) + self.data.row_id_eq_value(v) + } + + /// Find the first logical row that contains a value >= v + pub fn row_id_ge_value(&self, v: i64) -> Option { + if self.meta.max() < v { + return None; + } + self.data.row_id_ge_value(v) } } @@ -263,11 +361,23 @@ pub mod metadata { pub fn add(&mut self, s: Option) { self.num_rows += 1; - if self.range.0 > s { + if s < self.range.0 { self.range.0 = s.clone(); } - if self.range.1 < s { + if s > self.range.1 { + self.range.1 = s; + } + } + + pub fn add_repeated(&mut self, s: Option, additional: usize) { + self.num_rows += additional; + + if s < self.range.0 { + self.range.0 = s.clone(); + } + + if s > self.range.1 { self.range.1 = s; } } @@ -349,6 +459,10 @@ pub mod metadata { self.range.0 <= v && v <= self.range.1 } + pub fn max(&self) -> i64 { + self.range.1 + } + pub fn num_rows(&self) -> usize { self.num_rows } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 283398223f..596001d379 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -23,20 +23,37 @@ pub struct PlainFixed { impl PlainFixed where - T: PartialEq + Copy, + T: PartialEq + PartialOrd + Copy + std::fmt::Debug, { pub fn size(&self) -> usize { self.values.len() * std::mem::size_of::() } - pub fn row_id_for_value(&self, v: T) -> Option { + pub fn row_id_eq_value(&self, v: T) -> Option { self.values.iter().position(|x| *x == v) } + pub fn row_id_ge_value(&self, v: T) -> Option { + self.values.iter().position(|x| *x >= v) + } + // get value at row_id. Panics if out of bounds. pub fn value(&self, row_id: usize) -> T { self.values[row_id] } + + // TODO(edd): fix this when added NULL support + pub fn scan_from_until_some(&self, row_id: usize) -> Option { + unreachable!("to remove"); + // for v in self.values.iter().skip(row_id) { + // return Some(*v); + // } + // None + } + + pub fn scan_from(&self, row_id: usize) -> &[T] { + &self.values[row_id..] + } } impl From<&[i64]> for PlainFixed { @@ -213,6 +230,60 @@ impl DictionaryRLE { None } + // materialise a slice of rows starting from index. + pub fn scan_from(&self, index: usize) -> Vec<&Option> { + let mut result = vec![]; + if index >= self.total as usize { + return result; + } + + let start_row_id = index as u64; + + let mut curr_row_id = 0_u64; // this tracks the logical row id. + for (idx, rl) in &self.run_lengths { + // Fast path - at this point we are just materialising the RLE + // contents. + if curr_row_id > start_row_id { + let row_entry = self.index_entry.get(idx).unwrap(); + result.extend(vec![row_entry; *rl as usize]); + curr_row_id += rl; + continue; + } + + // Once we have reached the desired starting row_id we can emit values. + if (curr_row_id + *rl) >= start_row_id { + // Since it's unlikely that the desired row falls on a new RLE + // boundary we need to account for a partial RLE entry and only + // populate some of the remaining entry + let remainder = (curr_row_id + rl) - start_row_id; + let row_entry = self.index_entry.get(idx).unwrap(); + result.extend(vec![row_entry; remainder as usize]); + } + + // move onto next RLE entry. + curr_row_id += *rl; + } + result + } + + // // get the logical value at the provided index, or scan to the next value + // // that is non-null. + // pub fn scan_from_until_some(&self, index: usize) -> Option<&String> { + // if index < self.total as usize { + // let mut total = 0; + // for (idx, rl) in &self.run_lengths { + // if total + rl > index as u64 { + // // If there is a value then return otherwise continue. + // if let Some(v) = self.index_entry.get(idx) { + // return v.as_ref(); + // } + // } + // total += rl; + // } + // } + // None + // } + // values materialises a vector of references to all logical values in the // encoding. pub fn values(&mut self) -> Vec> { @@ -315,6 +386,46 @@ mod test { assert_eq!(drle.value(8).unwrap(), "zoo"); } + #[test] + fn dict_rle_scan_from() { + let mut drle = super::DictionaryRLE::new(); + let west = Some("west".to_string()); + let east = Some("east".to_string()); + let north = Some("north".to_string()); + drle.push_additional(west.clone(), 3); + drle.push_additional(east.clone(), 2); + drle.push_additional(north.clone(), 4); + + // all entries + let results = drle.scan_from(0); + let mut exp = vec![&west; 3]; + exp.extend(vec![&east; 2].iter()); + exp.extend(vec![&north; 4].iter()); + assert_eq!(results, exp); + + // partial results from an RLE entry + let results = drle.scan_from(2); + let mut exp = vec![&west; 1]; // notice partial results + exp.extend(vec![&east; 2].iter()); + exp.extend(vec![&north; 4].iter()); + assert_eq!(results, exp); + + // right on a boundary + let results = drle.scan_from(3); + let mut exp = vec![&east; 2]; + exp.extend(vec![&north; 4].iter()); + assert_eq!(results, exp); + + // partial final result + let results = drle.scan_from(6); + assert_eq!(results, vec![&north; 3]); + + // out of bounds + let results = drle.scan_from(9); + let exp: Vec<&Option> = vec![]; + assert_eq!(results, exp); + } + #[test] fn row_ids() { let mut drle = super::DictionaryRLE::new(); @@ -335,7 +446,8 @@ mod test { let ids = drle .row_ids(Some("foo".to_string())) .collect::>(); - assert_eq!(ids, vec![]); + let empty: Vec = vec![]; + assert_eq!(ids, empty); } #[test] @@ -361,6 +473,7 @@ mod test { .row_ids_roaring(Some("foo".to_string())) .iter() .collect::>(); - assert_eq!(ids, vec![]); + let empty: Vec = vec![]; + assert_eq!(ids, empty); } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 4b4a58289a..94953000a4 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use super::column; use super::column::Column; -#[derive(Debug, Default)] +#[derive(Debug)] pub struct Segment { meta: SegmentMetaData, @@ -14,9 +14,13 @@ pub struct Segment { impl Segment { pub fn new(rows: usize) -> Self { - let mut segment = Self::default(); - segment.meta.rows = rows; - segment + let mut meta = SegmentMetaData::default(); + meta.rows = rows; + Self { + meta, + columns: vec![], + time_column_idx: 0, + } } pub fn num_rows(&self) -> usize { @@ -40,6 +44,15 @@ impl Segment { } pub fn add_column(&mut self, name: &str, c: column::Column) { + assert_eq!( + self.meta.rows, + c.num_rows(), + "Column {:?} has {:?} rows but wanted {:?}", + name, + c.num_rows(), + self.meta.rows + ); + // TODO(edd) yuk if name == "time" { if let column::Column::Integer(ts) = &c { @@ -49,7 +62,6 @@ impl Segment { } self.time_column_idx = self.columns.len(); } - self.meta.rows = c.num_rows(); // validate column doesn't already exist in segment assert!(!self.meta.column_names.contains(&name.to_owned())); @@ -85,6 +97,13 @@ impl Segment { } column_sizes } + + pub fn scan_from(&self, column_name: &str, row_id: usize) -> Option { + if let Some(i) = self.column_names().iter().position(|c| c == column_name) { + return self.columns[i].scan_from(row_id); + } + None + } } /// Meta data for a segment. This data is mainly used to determine if a segment @@ -114,11 +133,14 @@ impl<'a> Segments<'a> { Self { segments } } + pub fn segments(&self) -> &Vec<&'a Segment> { + &self.segments + } + pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> { let mut segments: Vec<&Segment> = vec![]; for segment in &self.segments { if segment.meta.overlaps_time_range(min, max) { - println!("Segement {:?} overlaps", segment.meta); segments.push(segment); } } @@ -186,7 +208,7 @@ impl<'a> Segments<'a> { /// Returns the first value for a column in a set of segments. /// /// TODO(edd): could return NULL value.. - pub fn first(&self, column_name: &str) -> Option<(i64, Option)> { + pub fn first(&self, column_name: &str, min_ts: i64) -> Option<(i64, Option)> { if self.segments.is_empty() { return None; } @@ -195,12 +217,14 @@ impl<'a> Segments<'a> { for segment in &self.segments { // first find the logical row id of the minimum timestamp value if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { - // TODO(edd): clean up unwr - let min_ts = ts_col.column_range().0; - let min_ts_id = ts_col.row_id_for_value(min_ts).unwrap(); + let first_ts_id = ts_col.row_id_ge_value(min_ts)?; + println!("first ts is {:?}", first_ts_id); // now we have row id we can get value for that row id - let value = segment.column(column_name).unwrap().value(min_ts_id); + let value = segment + .column(column_name) + .unwrap() + .scan_from_until_some(first_ts_id); match &first_first { Some(prev) => { @@ -230,7 +254,7 @@ impl<'a> Segments<'a> { if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { // TODO(edd): clean up unwr let max_ts = ts_col.column_range().1; - let max_ts_id = ts_col.row_id_for_value(max_ts).unwrap(); + let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap(); // now we have row id we can get value for that row id let value = segment.column(column_name).unwrap().value(max_ts_id); From 150a5a9c81ee7d9377e1b8675aca3568c1ff8f04 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 6 Aug 2020 13:51:17 +0100 Subject: [PATCH 11/73] feat: get row by id --- delorean_mem_qe/src/bin/main.rs | 26 ++++++- delorean_mem_qe/src/column.rs | 2 + delorean_mem_qe/src/segment.rs | 131 ++++++++++++++++++-------------- 3 files changed, 102 insertions(+), 57 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 44776f035d..92c0144c60 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -53,6 +53,10 @@ fn main() { // ); // println!("{:?}", res); // let segments = segments.filter_by_time(1590036110000000, 1590044410000000); + // println!("{:?}", segments.last("host")); + // println!("{:?}", segments.segments().last().unwrap().row(14899)); + + time_row_by_id(&store, 14899); } fn build_store( @@ -197,7 +201,7 @@ fn time_column_first(store: &Store) { for _ in 1..repeat { let now = std::time::Instant::now(); let segments = store.segments(); - let res = segments.first("host", 0).unwrap(); + let res = segments.first("host").unwrap(); total_time += now.elapsed(); total_max += res.0; } @@ -209,3 +213,23 @@ fn time_column_first(store: &Store) { total_max ); } + +fn time_row_by_id(store: &Store, row_id: usize) { + let repeat = 100000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + for _ in 1..repeat { + let now = std::time::Instant::now(); + let segments = store.segments(); + let res = segments.segments().last().unwrap().row(row_id).unwrap(); + total_time += now.elapsed(); + total_max += res.len(); + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 727e8f2c2e..524d2b17b7 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -153,6 +153,7 @@ impl Column { } } + /// Returns the minimum value contained within this column. // FIXME(edd): Support NULL integers and floats pub fn min(&self) -> Option { match self { @@ -167,6 +168,7 @@ impl Column { } } + /// Returns the maximum value contained within this column. // FIXME(edd): Support NULL integers and floats pub fn max(&self) -> Option { match self { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 94953000a4..0b07ff2dbf 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -98,12 +98,25 @@ impl Segment { column_sizes } - pub fn scan_from(&self, column_name: &str, row_id: usize) -> Option { + pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option { if let Some(i) = self.column_names().iter().position(|c| c == column_name) { return self.columns[i].scan_from(row_id); } None } + + pub fn row(&self, row_id: usize) -> Option>> { + if row_id >= self.num_rows() { + return None; + } + + Some( + self.columns + .iter() + .map(|c| c.value(row_id)) + .collect::>>(), + ) + } } /// Meta data for a segment. This data is mainly used to determine if a segment @@ -137,6 +150,14 @@ impl<'a> Segments<'a> { &self.segments } + pub fn is_empty(&self) -> bool { + self.segments.is_empty() + } + + pub fn len(&self) -> usize { + self.segments.len() + } + pub fn filter_by_time(&self, min: i64, max: i64) -> Segments<'a> { let mut segments: Vec<&Segment> = vec![]; for segment in &self.segments { @@ -207,70 +228,68 @@ impl<'a> Segments<'a> { /// Returns the first value for a column in a set of segments. /// + /// The first value is based on the time column, therefore the returned value + /// may not be at the end of the column. + /// + /// If the time column has multiple max time values then the result is abitrary. + /// /// TODO(edd): could return NULL value.. - pub fn first(&self, column_name: &str, min_ts: i64) -> Option<(i64, Option)> { - if self.segments.is_empty() { - return None; + pub fn first(&self, column_name: &str) -> Option<(i64, Option)> { + // First let's find the segment with the latest time range. + // notice we order a < b on max time range. + let segment = self + .segments + .iter() + .min_by(|a, b| a.meta.time_range.0.cmp(&b.meta.time_range.0))?; + + // first find the logical row id of the minimum timestamp value + if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { + // TODO(edd): clean up unwrap + let min_ts = ts_col.column_range().0; + assert_eq!(min_ts, segment.meta.time_range.0); + + let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap(); + + println!("first ts is {:?} at row {:?}", min_ts, min_ts_id); + // now we have row id we can get value for that row id + let value = segment.column(column_name).unwrap().value(min_ts_id); + Some((min_ts, value)) + } else { + panic!("time column wrong type!"); } - - let mut first_first: Option<(i64, Option)> = None; - for segment in &self.segments { - // first find the logical row id of the minimum timestamp value - if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { - let first_ts_id = ts_col.row_id_ge_value(min_ts)?; - - println!("first ts is {:?}", first_ts_id); - // now we have row id we can get value for that row id - let value = segment - .column(column_name) - .unwrap() - .scan_from_until_some(first_ts_id); - - match &first_first { - Some(prev) => { - if prev.0 > min_ts { - first_first = Some((min_ts, value)); - } - } - None => first_first = Some((min_ts, value)), - } - } - } - - first_first } /// Returns the last value for a column in a set of segments. /// + /// The last value is based on the time column, therefore the returned value + /// may not be at the end of the column. + /// + /// If the time column has multiple max time values then the result is abitrary. + /// /// TODO(edd): could return NULL value.. pub fn last(&self, column_name: &str) -> Option<(i64, Option)> { - if self.segments.is_empty() { - return None; + // First let's find the segment with the latest time range. + // notice we order a > b on max time range. + let segment = self + .segments + .iter() + .max_by(|a, b| a.meta.time_range.1.cmp(&b.meta.time_range.1))?; + + // first find the logical row id of the minimum timestamp value + if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { + // TODO(edd): clean up unwrap + let max_ts = ts_col.column_range().1; + assert_eq!(max_ts, segment.meta.time_range.1); + + let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap(); + + println!("last ts is {:?} at row {:?}", max_ts, max_ts_id); + // now we have row id we can get value for that row id + let value = segment.column(column_name).unwrap().value(max_ts_id); + Some((max_ts, value)) + } else { + panic!("time column wrong type!"); } - - let mut last_last: Option<(i64, Option)> = None; - for segment in &self.segments { - // first find the logical row id of the minimum timestamp value - if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { - // TODO(edd): clean up unwr - let max_ts = ts_col.column_range().1; - let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap(); - - // now we have row id we can get value for that row id - let value = segment.column(column_name).unwrap().value(max_ts_id); - - match &last_last { - Some(prev) => { - if prev.0 < max_ts { - last_last = Some((max_ts, value)); - } - } - None => last_last = Some((max_ts, value)), - } - } - } - - last_last } } From ee8ac1b909bb4cdd2a6280de6ff61e49c0e6fcb7 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 6 Aug 2020 14:43:02 +0100 Subject: [PATCH 12/73] refactor: return row id --- delorean_mem_qe/src/bin/main.rs | 10 ++++++---- delorean_mem_qe/src/segment.rs | 10 ++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 92c0144c60..f8634fc053 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -56,7 +56,7 @@ fn main() { // println!("{:?}", segments.last("host")); // println!("{:?}", segments.segments().last().unwrap().row(14899)); - time_row_by_id(&store, 14899); + time_row_by_last_ts(&store); } fn build_store( @@ -214,13 +214,15 @@ fn time_column_first(store: &Store) { ); } -fn time_row_by_id(store: &Store, row_id: usize) { +fn time_row_by_last_ts(store: &Store) { let repeat = 100000; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; - for _ in 1..repeat { + let segments = store.segments(); + for _ in 0..repeat { let now = std::time::Instant::now(); - let segments = store.segments(); + + let (_, _, row_id) = segments.last("time").unwrap(); let res = segments.segments().last().unwrap().row(row_id).unwrap(); total_time += now.elapsed(); total_max += res.len(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 0b07ff2dbf..716392e5d1 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -234,7 +234,7 @@ impl<'a> Segments<'a> { /// If the time column has multiple max time values then the result is abitrary. /// /// TODO(edd): could return NULL value.. - pub fn first(&self, column_name: &str) -> Option<(i64, Option)> { + pub fn first(&self, column_name: &str) -> Option<(i64, Option, usize)> { // First let's find the segment with the latest time range. // notice we order a < b on max time range. let segment = self @@ -250,10 +250,9 @@ impl<'a> Segments<'a> { let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap(); - println!("first ts is {:?} at row {:?}", min_ts, min_ts_id); // now we have row id we can get value for that row id let value = segment.column(column_name).unwrap().value(min_ts_id); - Some((min_ts, value)) + Some((min_ts, value, min_ts_id)) } else { panic!("time column wrong type!"); } @@ -267,7 +266,7 @@ impl<'a> Segments<'a> { /// If the time column has multiple max time values then the result is abitrary. /// /// TODO(edd): could return NULL value.. - pub fn last(&self, column_name: &str) -> Option<(i64, Option)> { + pub fn last(&self, column_name: &str) -> Option<(i64, Option, usize)> { // First let's find the segment with the latest time range. // notice we order a > b on max time range. let segment = self @@ -283,10 +282,9 @@ impl<'a> Segments<'a> { let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap(); - println!("last ts is {:?} at row {:?}", max_ts, max_ts_id); // now we have row id we can get value for that row id let value = segment.column(column_name).unwrap().value(max_ts_id); - Some((max_ts, value)) + Some((max_ts, value, max_ts_id)) } else { panic!("time column wrong type!"); } From e3e1611e821c78814d4124035b75c3a8ed14959b Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 6 Aug 2020 17:05:39 +0100 Subject: [PATCH 13/73] feat: predicate pushdown --- delorean_mem_qe/src/bin/main.rs | 75 +++++++++++++++- delorean_mem_qe/src/column.rs | 149 ++++++++++++++++++++++++++++---- delorean_mem_qe/src/encoding.rs | 77 ++++++++++++++++- delorean_mem_qe/src/segment.rs | 93 ++++++++++++++++---- 4 files changed, 358 insertions(+), 36 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index f8634fc053..91f36dfcd5 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -56,7 +56,35 @@ fn main() { // println!("{:?}", segments.last("host")); // println!("{:?}", segments.segments().last().unwrap().row(14899)); - time_row_by_last_ts(&store); + // time_row_by_last_ts(&store); + + let rows = segments + .segments() + .last() + .unwrap() + .filter_by_predicate_eq( + Some((1590040770000000, 1590040790000000)), + vec![ + ("env", Some(&column::Scalar::String("prod01-us-west-2"))), + ("method", Some(&column::Scalar::String("GET"))), + ( + "host", + Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), + ), + ], + ) + .unwrap(); + + for row_id in rows.iter() { + println!( + "{:?} - {:?}", + row_id, + segments.segments().last().unwrap().row(row_id as usize) + ); + } + println!("{:?}", rows.cardinality()); + + time_row_by_preds(&store); } fn build_store( @@ -235,3 +263,48 @@ fn time_row_by_last_ts(store: &Store) { total_max ); } + +fn time_row_by_preds(store: &Store) { + let repeat = 100000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let rows = segments + .segments() + .last() + .unwrap() + .filter_by_predicate_eq( + Some((1590040770000000, 1590040790000000)), + vec![ + ("env", Some(&column::Scalar::String("prod01-us-west-2"))), + ("method", Some(&column::Scalar::String("GET"))), + ( + "host", + Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), + ), + ], + ) + .unwrap(); + + // for row_id in rows.iter() { + // println!( + // "{:?} - {:?}", + // row_id, + // segments.segments().last().unwrap().row(row_id as usize) + // ); + // } + + total_time += now.elapsed(); + total_max += rows.cardinality(); + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 524d2b17b7..bf1ca53c8f 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -127,27 +127,89 @@ impl Column { } } - pub fn maybe_contains(&self, value: &Scalar) -> bool { + pub fn maybe_contains(&self, value: Option<&Scalar>) -> bool { match self { - Column::String(c) => { - if let Scalar::String(v) = value { - c.meta.maybe_contains_value(&v.to_string()) - } else { - panic!("invalid value"); + Column::String(c) => match value { + Some(scalar) => { + if let Scalar::String(v) = scalar { + c.meta.maybe_contains_value(Some(v.to_string())) + } else { + panic!("invalid value"); + } } - } + None => c.meta.maybe_contains_value(None), + }, Column::Float(c) => { - if let Scalar::Float(v) = value { + if let Some(Scalar::Float(v)) = value { c.meta.maybe_contains_value(v.to_owned()) } else { - panic!("invalid value"); + panic!("invalid value or unsupported null"); } } Column::Integer(c) => { - if let Scalar::Integer(v) = value { + if let Some(Scalar::Integer(v)) = value { c.meta.maybe_contains_value(v.to_owned()) } else { - panic!("invalid value"); + panic!("invalid value or unsupported null"); + } + } + } + } + + /// returns true if the column cannot contain + pub fn max_less_than(&self, value: Option<&Scalar>) -> bool { + match self { + Column::String(c) => match value { + Some(scalar) => { + if let Scalar::String(v) = scalar { + c.meta.range().1 < Some(&v.to_string()) + } else { + panic!("invalid value"); + } + } + None => c.meta.range().1 < None, + }, + Column::Float(c) => { + if let Some(Scalar::Float(v)) = value { + c.meta.range().1 < *v + } else { + panic!("invalid value or unsupported null"); + } + } + Column::Integer(c) => { + if let Some(Scalar::Integer(v)) = value { + c.meta.range().1 < *v + } else { + panic!("invalid value or unsupported null"); + } + } + } + } + + pub fn min_greater_than(&self, value: Option<&Scalar>) -> bool { + match self { + Column::String(c) => match value { + Some(scalar) => { + if let Scalar::String(v) = scalar { + c.meta.range().0 > Some(&v.to_string()) + } else { + panic!("invalid value"); + } + } + None => c.meta.range().0 > None, + }, + Column::Float(c) => { + if let Some(Scalar::Float(v)) = value { + c.meta.range().0 > *v + } else { + panic!("invalid value or unsupported null"); + } + } + Column::Integer(c) => { + if let Some(Scalar::Integer(v)) = value { + c.meta.range().0 > *v + } else { + panic!("invalid value or unsupported null"); } } } @@ -182,6 +244,62 @@ impl Column { Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)), } } + + // TODO(edd) shouldn't let roaring stuff leak out... + pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option { + if !self.maybe_contains(value) { + return None; + } + self.row_ids(value, std::cmp::Ordering::Equal) + } + + pub fn row_ids_gt(&self, value: Option<&Scalar>) -> Option { + if self.max_less_than(value) { + return None; + } + self.row_ids(value, std::cmp::Ordering::Greater) + } + + pub fn row_ids_lt(&self, value: Option<&Scalar>) -> Option { + if self.min_greater_than(value) { + return None; + } + self.row_ids(value, std::cmp::Ordering::Less) + } + + // TODO(edd) shouldn't let roaring stuff leak out... + fn row_ids( + &self, + value: Option<&Scalar>, + order: std::cmp::Ordering, + ) -> Option { + match self { + Column::String(c) => match value { + Some(scalar) => { + if let Scalar::String(v) = scalar { + Some(c.data.row_ids_roaring(Some(v.to_string()))) + } else { + panic!("invalid value"); + } + } + None => Some(c.data.row_ids_roaring(None)), + }, + Column::Float(c) => { + if let Some(Scalar::Float(v)) = value { + Some(c.data.row_ids_roaring(v, order)) + } else { + panic!("invalid value or unsupported null"); + } + } + Column::Integer(c) => { + if let Some(Scalar::Integer(v)) = value { + Some(c.data.row_ids_roaring(v, order)) + } else { + panic!("invalid value or unsupported null"); + } + } + } + } } impl From<&[f64]> for Column { @@ -388,13 +506,8 @@ pub mod metadata { self.num_rows } - pub fn maybe_contains_value(&self, v: &str) -> bool { - let res = self.range.0 <= Some(v.to_string()) && Some(v.to_string()) <= self.range.1; - println!( - "column with ({:?}) maybe contain {:?} -- {:?}", - self.range, v, res - ); - res + pub fn maybe_contains_value(&self, v: Option) -> bool { + self.range.0 <= v && v <= self.range.1 } pub fn range(&self) -> (Option<&String>, Option<&String>) { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 596001d379..cb33423e2b 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -54,6 +54,40 @@ where pub fn scan_from(&self, row_id: usize) -> &[T] { &self.values[row_id..] } + + /// returns a set of row ids that match an ordering on a desired value + pub fn row_ids_roaring(&self, wanted: &T, order: std::cmp::Ordering) -> croaring::Bitmap { + let mut bm = croaring::Bitmap::create(); + + let mut found = false; //self.values[0]; + let mut count = 0; + for (i, next) in self.values.iter().enumerate() { + if next.partial_cmp(wanted) != Some(order) && found { + let (min, max) = (i as u64 - count as u64, i as u64); + bm.add_range(min..max); + found = false; + count = 0; + continue; + } else if next.partial_cmp(wanted) != Some(order) { + continue; + } + + if !found { + found = true; + } + count += 1; + } + + // add any remaining range. + if found { + let (min, max) = ( + (self.values.len()) as u64 - count as u64, + (self.values.len()) as u64, + ); + bm.add_range(min..max); + } + bm + } } impl From<&[i64]> for PlainFixed { @@ -337,6 +371,45 @@ impl std::convert::From<&delorean_table::Packer> for #[cfg(test)] mod test { + #[test] + fn plain_row_ids_roaring_eq() { + let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; + let col = super::PlainFixed::from(input.as_slice()); + + let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Equal); + assert_eq!(bm.to_vec(), vec![5, 6]); + + let bm = col.row_ids_roaring(&1, std::cmp::Ordering::Equal); + assert_eq!(bm.to_vec(), vec![0, 1, 2, 3, 12]); + + let bm = col.row_ids_roaring(&6, std::cmp::Ordering::Equal); + assert_eq!(bm.to_vec(), vec![8]); + + let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Equal); + assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]); + + let bm = col.row_ids_roaring(&20, std::cmp::Ordering::Equal); + assert_eq!(bm.to_vec(), vec![]); + } + + #[test] + fn plain_row_ids_roaring_gt() { + let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; + let col = super::PlainFixed::from(input.as_slice()); + + let bm = col.row_ids_roaring(&0, std::cmp::Ordering::Greater); + let exp: Vec = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; + assert_eq!(bm.to_vec(), exp); + + let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Greater); + let exp: Vec = vec![7, 8, 9, 10, 11, 13]; + assert_eq!(bm.to_vec(), exp); + + let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Greater); + let exp: Vec = vec![8]; + assert_eq!(bm.to_vec(), exp); + } + #[test] fn dict_rle() { let mut drle = super::DictionaryRLE::new(); @@ -427,7 +500,7 @@ mod test { } #[test] - fn row_ids() { + fn rle_dict_row_ids() { let mut drle = super::DictionaryRLE::new(); drle.push_additional(Some("abc".to_string()), 3); drle.push_additional(Some("dre".to_string()), 2); @@ -451,7 +524,7 @@ mod test { } #[test] - fn row_ids_roaring() { + fn dict_rle_row_ids_roaring() { let mut drle = super::DictionaryRLE::new(); drle.push_additional(Some("abc".to_string()), 3); drle.push_additional(Some("dre".to_string()), 2); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 716392e5d1..0d815c2225 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -117,6 +117,61 @@ impl Segment { .collect::>>(), ) } + + pub fn filter_by_predicate_eq( + &self, + time_range: Option<(i64, i64)>, + predicates: Vec<(&str, Option<&column::Scalar>)>, + ) -> Option { + let mut bm = None; + if let Some((min, max)) = time_range { + if !self.meta.overlaps_time_range(min, max) { + return None; // segment doesn't have time range + } + + // TODO THIS COULD BE FASTER! + + // find all timestamps row ids > min time + let rows_gt_min = + self.columns[self.time_column_idx].row_ids_gt(Some(&column::Scalar::Integer(min))); + // find all timestamps < max time + let rows_lt_max = + self.columns[self.time_column_idx].row_ids_lt(Some(&column::Scalar::Integer(max))); + + // Finally intersect matching timestamp rows + if rows_gt_min.is_none() && rows_lt_max.is_none() { + return None; + } else if rows_gt_min.is_none() { + bm = rows_lt_max; + } else if rows_lt_max.is_none() { + bm = rows_gt_min; + } else { + let mut rows = rows_gt_min.unwrap(); + rows.and_inplace(&rows_lt_max.unwrap()); + if rows.is_empty() { + return None; + } + bm = Some(rows); + } + } + + // now intersect matching rows for each column + let mut bm = bm.unwrap(); + for (col_pred_name, col_pred_value) in predicates { + if let Some(c) = self.column(col_pred_name) { + match c.row_ids_eq(col_pred_value) { + Some(row_ids) => { + bm.and_inplace(&row_ids); + if bm.is_empty() { + return None; + } + } + None => return None, // if this predicate doesn't match then no rows match + } + } + } + Some(bm) + } } /// Meta data for a segment. This data is mainly used to determine if a segment @@ -168,21 +223,29 @@ impl<'a> Segments<'a> { Self::new(segments) } - pub fn filter_by_predicate_eq( - &self, - column_name: &str, - value: &column::Scalar, - ) -> Segments<'a> { - let mut segments: Vec<&Segment> = vec![]; - for segment in &self.segments { - if let Some(col) = segment.column(column_name) { - if col.maybe_contains(&value) { - segments.push(segment); - } - } - } - Self::new(segments) - } + // pub fn filter_by_predicate_eq( + // &self, + // time_range: Option<(i64, i64)>, + // predicates: Vec<(&str, &column::Scalar)>, + // ) -> Option { + // let bm = None; + // for segment in self.segments { + // if let Some((min, max)) = time_range { + // if !segment.meta.overlaps_time_range(min, max) { + // continue; // segment doesn't have time range + // } + // } + + // // build set of + + // if let Some(col) = segment.column(column_name) { + // if col.maybe_contains(&value) { + // segments.push(segment); + // } + // } + // } + // Self::new(segments) + // } /// Returns the minimum value for a column in a set of segments. pub fn column_min(&self, column_name: &str) -> Option { From da9d3cd52803d038d2a413ecebcfcb5779b96c04 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 6 Aug 2020 18:41:56 +0100 Subject: [PATCH 14/73] feat: grouping and aggregate --- delorean_mem_qe/src/bin/main.rs | 24 +++++++++++++- delorean_mem_qe/src/column.rs | 29 +++++++++++++++++ delorean_mem_qe/src/encoding.rs | 58 ++++++++++++++++++++++++++++++--- delorean_mem_qe/src/segment.rs | 35 +++++++++++++++++--- 4 files changed, 137 insertions(+), 9 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 91f36dfcd5..bce6fe2384 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -84,7 +84,29 @@ fn main() { } println!("{:?}", rows.cardinality()); - time_row_by_preds(&store); + // time_row_by_preds(&store); + + let group_ids = segments + .segments() + .last() + .unwrap() + .group_by_column_ids("env") + .unwrap(); + + for (col_values, row_ids) in group_ids { + let (min, max) = segments.segments().last().unwrap().time_range(); + println!( + "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?}", + min, + max, + col_values, + segments + .segments() + .last() + .unwrap() + .sum_column(&"counter", &row_ids) + ); + } } fn build_store( diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index bf1ca53c8f..5163c9a5b2 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -245,6 +245,24 @@ impl Column { } } + pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> Option { + match self { + Column::String(_) => unimplemented!("not implemented"), + Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))), + Column::Integer(_) => unimplemented!("not implemented"), + } + } + + pub fn group_by_ids( + &self, + ) -> &std::collections::BTreeMap, croaring::Bitmap> { + match self { + Column::String(c) => c.data.group_row_ids(), + Column::Float(_) => unimplemented!("not implemented"), + Column::Integer(_) => unimplemented!("not implemented"), + } + } + // TODO(edd) shouldn't let roaring stuff leak out... pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option { if !self.maybe_contains(value) { @@ -353,6 +371,13 @@ impl String { unreachable!("don't need this"); // self.data.scan_from_until_some(row_id) } + + // TODO(edd) shouldn't let roaring stuff leak out... + pub fn group_row_ids( + &self, + ) -> &std::collections::BTreeMap, croaring::Bitmap> { + self.data.group_row_ids() + } } #[derive(Debug, Default)] @@ -383,6 +408,10 @@ impl Float { pub fn scan_from_until_some(&self, row_id: usize) -> Option { self.data.scan_from_until_some(row_id) } + + pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> f64 { + self.data.sum_by_ids(row_ids) + } } impl From<&[f64]> for Float { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index cb33423e2b..4b856f4dac 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -23,7 +23,7 @@ pub struct PlainFixed { impl PlainFixed where - T: PartialEq + PartialOrd + Copy + std::fmt::Debug, + T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, { pub fn size(&self) -> usize { self.values.len() * std::mem::size_of::() @@ -88,6 +88,17 @@ where } bm } + + // TODO(edd): make faster + pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> T { + let mut res = T::default(); + row_ids.iter().for_each(|x| res += self.value(x as usize)); + res + } + + pub fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { + row_ids.cardinality() + } } impl From<&[i64]> for PlainFixed { @@ -113,6 +124,9 @@ pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. entry_index: BTreeMap, usize>, + // Experiment - store rows that each entry has a value for + entry_row_ids: BTreeMap, croaring::Bitmap>, + // stores the mapping between an index and its entry. index_entry: BTreeMap>, @@ -130,6 +144,7 @@ impl DictionaryRLE { pub fn new() -> Self { Self { entry_index: BTreeMap::new(), + entry_row_ids: BTreeMap::new(), index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), @@ -147,7 +162,6 @@ impl DictionaryRLE { } pub fn push_additional(&mut self, v: Option, additional: u64) { - self.total += additional; let idx = self.entry_index.get(&v); match idx { Some(idx) => { @@ -160,6 +174,10 @@ impl DictionaryRLE { self.run_lengths.push((*idx, additional)); self.run_length_size += std::mem::size_of::<(usize, u64)>(); } + self.entry_row_ids + .get_mut(&v) + .unwrap() + .add_range(self.total..self.total + additional); } } None => { @@ -168,18 +186,24 @@ impl DictionaryRLE { let idx = self.entry_index.len(); self.entry_index.insert(v.clone(), idx); + self.entry_row_ids + .insert(v.clone(), croaring::Bitmap::create()); if let Some(value) = &v { self.map_size += value.len(); } - self.index_entry.insert(idx, v); + self.index_entry.insert(idx, v.clone()); self.map_size += 8 + std::mem::size_of::(); // TODO(edd): clean this option size up self.run_lengths.push((idx, additional)); + self.entry_row_ids + .get_mut(&v) + .unwrap() + .add_range(self.total..self.total + additional); self.run_length_size += std::mem::size_of::<(usize, u64)>(); - return; } } } + self.total += additional; } // row_ids returns an iterator over the set of row ids matching the provided @@ -216,6 +240,11 @@ impl DictionaryRLE { bm } + // get the set of row ids for each distinct value + pub fn group_row_ids(&self) -> &BTreeMap, croaring::Bitmap> { + &self.entry_row_ids + } + // row_ids returns an iterator over the set of row ids matching the provided // value // pub fn row_ids(&'a self, value: &str) -> impl iter::Iterator { @@ -457,6 +486,27 @@ mod test { assert_eq!(drle.value(6).unwrap(), "zoo"); assert_eq!(drle.value(7).unwrap(), "zoo"); assert_eq!(drle.value(8).unwrap(), "zoo"); + + let row_ids = drle + .entry_row_ids + .get(&Some("hello".to_string())) + .unwrap() + .to_vec(); + assert_eq!(row_ids, vec![0, 1, 3, 4, 5]); + + let row_ids = drle + .entry_row_ids + .get(&Some("world".to_string())) + .unwrap() + .to_vec(); + assert_eq!(row_ids, vec![2]); + + let row_ids = drle + .entry_row_ids + .get(&Some("zoo".to_string())) + .unwrap() + .to_vec(); + assert_eq!(row_ids, vec![6, 7, 8]); } #[test] diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 0d815c2225..cb6e934535 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -118,6 +118,23 @@ impl Segment { ) } + pub fn group_by_column_ids( + &self, + name: &str, + ) -> Option<&std::collections::BTreeMap, croaring::Bitmap>> { + if let Some(c) = self.column(name) { + return Some(c.group_by_ids()); + } + None + } + + pub fn sum_column(&self, name: &str, row_ids: &croaring::Bitmap) -> Option { + if let Some(c) = self.column(name) { + return c.sum_by_ids(row_ids); + } + None + } + pub fn filter_by_predicate_eq( &self, time_range: Option<(i64, i64)>, @@ -156,21 +173,31 @@ impl Segment { } // now intersect matching rows for each column - let mut bm = bm.unwrap(); + // let mut bm = bm.unwrap(); for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { match c.row_ids_eq(col_pred_value) { Some(row_ids) => { - bm.and_inplace(&row_ids); - if bm.is_empty() { + if row_ids.is_empty() { return None; } + + match &mut bm { + Some(all) => { + all.and_inplace(&row_ids); + if all.is_empty() { + // no rows intersect + return None; + } + } + None => bm = Some(row_ids), + } } None => return None, // if this predicate doesn't match then no rows match } } } - Some(bm) + bm } } From 759254b381d6259364c700bdb3986866ff8b2c0e Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 10 Aug 2020 15:08:45 +0100 Subject: [PATCH 15/73] feat: add support for time >= x and time < y --- delorean_mem_qe/src/bin/main.rs | 99 ++++++++++---------- delorean_mem_qe/src/column.rs | 91 ++++++++++++++++--- delorean_mem_qe/src/encoding.rs | 155 ++++++++++++++++++++++++++++---- delorean_mem_qe/src/segment.rs | 141 +++++++++++++++++++---------- 4 files changed, 362 insertions(+), 124 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index bce6fe2384..d553c72451 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -58,54 +58,61 @@ fn main() { // time_row_by_last_ts(&store); - let rows = segments - .segments() - .last() - .unwrap() - .filter_by_predicate_eq( - Some((1590040770000000, 1590040790000000)), - vec![ - ("env", Some(&column::Scalar::String("prod01-us-west-2"))), - ("method", Some(&column::Scalar::String("GET"))), - ( - "host", - Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), - ), - ], - ) - .unwrap(); + // let rows = segments + // .segments() + // .last() + // .unwrap() + // .filter_by_predicate_eq( + // Some((1590040770000000, 1590040790000000)), + // vec![ + // ("env", Some(&column::Scalar::String("prod01-us-west-2"))), + // ("method", Some(&column::Scalar::String("GET"))), + // ( + // "host", + // Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), + // ), + // ], + // ) + // .unwrap(); - for row_id in rows.iter() { - println!( - "{:?} - {:?}", - row_id, - segments.segments().last().unwrap().row(row_id as usize) - ); - } - println!("{:?}", rows.cardinality()); + // for row_id in rows.iter() { + // println!( + // "{:?} - {:?}", + // row_id, + // segments.segments().last().unwrap().row(row_id as usize) + // ); + // } + // println!("{:?}", rows.cardinality()); // time_row_by_preds(&store); + loop { + let mut total_count = 0.0; + let now = std::time::Instant::now(); + for segment in segments.segments() { + let (min, max) = segment.time_range(); + let time_ids = segment.filter_by_predicates_eq((min, max), vec![]).unwrap(); - let group_ids = segments - .segments() - .last() - .unwrap() - .group_by_column_ids("env") - .unwrap(); - - for (col_values, row_ids) in group_ids { - let (min, max) = segments.segments().last().unwrap().time_range(); - println!( - "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?}", - min, - max, - col_values, - segments - .segments() - .last() - .unwrap() - .sum_column(&"counter", &row_ids) - ); + let group_ids = segment.group_by_column_ids("env").unwrap(); + for (col_values, row_ids) in group_ids { + // filter ids by time + let mut result = row_ids.and(&time_ids); + // let + // println!( + // "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})", + // min, + // max, + // col_values, + // segment.sum_column(&"counter", &result), + // result.cardinality(), + // ); + if let column::Scalar::Float(x) = + segment.sum_column(&"counter", &mut result).unwrap() + { + total_count += x; + } + } + } + println!("Done ({:?}) in {:?}", total_count, now.elapsed()); } } @@ -298,8 +305,8 @@ fn time_row_by_preds(store: &Store) { .segments() .last() .unwrap() - .filter_by_predicate_eq( - Some((1590040770000000, 1590040790000000)), + .filter_by_predicates_eq( + (1590040770000000, 1590040790000000), vec![ ("env", Some(&column::Scalar::String("prod01-us-west-2"))), ("method", Some(&column::Scalar::String("GET"))), diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 5163c9a5b2..5b75e49d7f 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -245,7 +245,7 @@ impl Column { } } - pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> Option { + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { match self { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))), @@ -285,6 +285,66 @@ impl Column { self.row_ids(value, std::cmp::Ordering::Less) } + // allows you to do: + // WHERE time >= 0 AND time < 100 + // + // or + // + // WHERE counter >= 102.2 AND counter < 2929.32 + pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option { + match self { + Column::String(c) => { + unimplemented!("not implemented yet"); + } + Column::Float(c) => { + let (col_min, col_max) = c.meta.range(); + if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) { + if *low >= col_min && *high < col_max { + // In this case the column completely covers the range. + // TODO: PERF - need to _not_ return a bitset rather than + // return a full one. Need to differentiate between "no values" + // and "all values" in the context of an Option. Right now + // None means "no values" + // + let mut bm = croaring::Bitmap::create(); + bm.add_range(0..c.meta.num_rows() as u64); // all rows + return Some(bm); + } + + // The column has some values that are outside of the + // desired range so we need to determine the set of matching + // row ids. + Some(c.data.row_ids_gte_lt_roaring(low, high)) + } else { + panic!("not supposed to be here"); + } + } + Column::Integer(c) => { + let (col_min, col_max) = c.meta.range(); + if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) { + if *low >= col_min && *high < col_max { + // In this case the column completely covers the range. + // TODO: PERF - need to _not_ return a bitset rather than + // return a full one. Need to differentiate between "no values" + // and "all values" in the context of an Option. Right now + // None means "no values" + // + let mut bm = croaring::Bitmap::create(); + bm.add_range(0..c.meta.num_rows() as u64); // all rows + return Some(bm); + } + + // The column has some values that are outside of the + // desired range so we need to determine the set of matching + // row ids. + Some(c.data.row_ids_gte_lt_roaring(low, high)) + } else { + panic!("not supposed to be here"); + } + } + } + } + // TODO(edd) shouldn't let roaring stuff leak out... fn row_ids( &self, @@ -292,26 +352,31 @@ impl Column { order: std::cmp::Ordering, ) -> Option { match self { - Column::String(c) => match value { - Some(scalar) => { - if let Scalar::String(v) = scalar { - Some(c.data.row_ids_roaring(Some(v.to_string()))) - } else { - panic!("invalid value"); - } + Column::String(c) => { + if order != std::cmp::Ordering::Equal { + unimplemented!("> < not supported on strings yet"); } - None => Some(c.data.row_ids_roaring(None)), - }, + match value { + Some(scalar) => { + if let Scalar::String(v) = scalar { + Some(c.data.row_ids_eq_roaring(Some(v.to_string()))) + } else { + panic!("invalid value"); + } + } + None => Some(c.data.row_ids_eq_roaring(None)), + } + } Column::Float(c) => { if let Some(Scalar::Float(v)) = value { - Some(c.data.row_ids_roaring(v, order)) + Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); } } Column::Integer(c) => { if let Some(Scalar::Integer(v)) = value { - Some(c.data.row_ids_roaring(v, order)) + Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); } @@ -409,7 +474,7 @@ impl Float { self.data.scan_from_until_some(row_id) } - pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> f64 { + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 { self.data.sum_by_ids(row_ids) } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 4b856f4dac..a575e240a0 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -18,6 +18,7 @@ impl PlainFixedOption { // No compression pub struct PlainFixed { values: Vec, + buf: Vec, total_order: bool, // if true the column is totally ordered ascending. } @@ -55,8 +56,14 @@ where &self.values[row_id..] } - /// returns a set of row ids that match an ordering on a desired value - pub fn row_ids_roaring(&self, wanted: &T, order: std::cmp::Ordering) -> croaring::Bitmap { + /// returns a set of row ids that match a single ordering on a desired value + /// + /// This supports `value = x` , `value < x` or `value > x`. + pub fn row_ids_single_cmp_roaring( + &self, + wanted: &T, + order: std::cmp::Ordering, + ) -> croaring::Bitmap { let mut bm = croaring::Bitmap::create(); let mut found = false; //self.values[0]; @@ -89,10 +96,100 @@ where bm } + /// returns a set of row ids that match the half open interval `[from, to)`. + /// + /// The main use-case for this is time range filtering. + pub fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap { + let mut bm = croaring::Bitmap::create(); + + let mut found = false; //self.values[0]; + let mut count = 0; + for (i, next) in self.values.iter().enumerate() { + if (next < from || next >= to) && found { + let (min, max) = (i as u64 - count as u64, i as u64); + bm.add_range(min..max); + found = false; + count = 0; + continue; + } else if next < from || next >= to { + continue; + } + + if !found { + found = true; + } + count += 1; + } + + // add any remaining range. + if found { + let (min, max) = ( + (self.values.len()) as u64 - count as u64, + (self.values.len()) as u64, + ); + bm.add_range(min..max); + } + bm + } + // TODO(edd): make faster - pub fn sum_by_ids(&self, row_ids: &croaring::Bitmap) -> T { + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T { let mut res = T::default(); - row_ids.iter().for_each(|x| res += self.value(x as usize)); + // println!( + // "cardinality is {:?} out of {:?}", + // row_ids.cardinality(), + // self.values.len() + // ); + + // HMMMMM - materialising which has a memory cost. + // let vec = row_ids.to_vec(); + // for v in vec.chunks_exact(4) { + // res += self.value(v[0] as usize); + // res += self.value(v[1] as usize); + // res += self.value(v[2] as usize); + // res += self.value(v[3] as usize); + // } + + // HMMMMM - materialising which has a memory cost. + let vec = row_ids.to_vec(); + for v in vec { + res += self.value(v as usize); + } + + // for v in row_ids.iter() { + // res += self.value(v as usize); + // } + + // let step = 16_u64; + // for i in (0..self.values.len() as u64).step_by(step as usize) { + // if row_ids.contains_range(i..i + step) { + // res += self.value(i as usize + 15); + // res += self.value(i as usize + 14); + // res += self.value(i as usize + 13); + // res += self.value(i as usize + 12); + // res += self.value(i as usize + 11); + // res += self.value(i as usize + 10); + // res += self.value(i as usize + 9); + // res += self.value(i as usize + 8); + // res += self.value(i as usize + 7); + // res += self.value(i as usize + 6); + // res += self.value(i as usize + 5); + // res += self.value(i as usize + 4); + // res += self.value(i as usize + 3); + // res += self.value(i as usize + 2); + // res += self.value(i as usize + 1); + // res += self.value(i as usize); + // continue; + // } + + // for j in i..i + step { + // if row_ids.contains(j as u32) { + // res += self.value(j as usize); + // } + // } + // } + + // row_ids.iter().for_each(|x| res += self.value(x as usize)); res } @@ -105,6 +202,7 @@ impl From<&[i64]> for PlainFixed { fn from(v: &[i64]) -> Self { Self { values: v.to_vec(), + buf: Vec::with_capacity(v.len()), total_order: false, } } @@ -114,6 +212,7 @@ impl From<&[f64]> for PlainFixed { fn from(v: &[f64]) -> Self { Self { values: v.to_vec(), + buf: Vec::with_capacity(v.len()), total_order: false, } } @@ -225,7 +324,7 @@ impl DictionaryRLE { // row_ids returns an iterator over the set of row ids matching the provided // value. - pub fn row_ids_roaring(&self, value: Option) -> croaring::Bitmap { + pub fn row_ids_eq_roaring(&self, value: Option) -> croaring::Bitmap { let mut bm = croaring::Bitmap::create(); if let Some(idx) = self.entry_index.get(&value) { let mut index: u64 = 0; @@ -405,40 +504,62 @@ mod test { let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; let col = super::PlainFixed::from(input.as_slice()); - let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Equal); + let bm = col.row_ids_single_cmp_roaring(&4, std::cmp::Ordering::Equal); assert_eq!(bm.to_vec(), vec![5, 6]); - let bm = col.row_ids_roaring(&1, std::cmp::Ordering::Equal); + let bm = col.row_ids_single_cmp_roaring(&1, std::cmp::Ordering::Equal); assert_eq!(bm.to_vec(), vec![0, 1, 2, 3, 12]); - let bm = col.row_ids_roaring(&6, std::cmp::Ordering::Equal); + let bm = col.row_ids_single_cmp_roaring(&6, std::cmp::Ordering::Equal); assert_eq!(bm.to_vec(), vec![8]); - let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Equal); + let bm = col.row_ids_single_cmp_roaring(&5, std::cmp::Ordering::Equal); assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]); - let bm = col.row_ids_roaring(&20, std::cmp::Ordering::Equal); + let bm = col.row_ids_single_cmp_roaring(&20, std::cmp::Ordering::Equal); assert_eq!(bm.to_vec(), vec![]); } #[test] - fn plain_row_ids_roaring_gt() { + fn plain_row_ids_cmp_roaring_gt() { let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; let col = super::PlainFixed::from(input.as_slice()); - let bm = col.row_ids_roaring(&0, std::cmp::Ordering::Greater); + let bm = col.row_ids_single_cmp_roaring(&0, std::cmp::Ordering::Greater); let exp: Vec = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; assert_eq!(bm.to_vec(), exp); - let bm = col.row_ids_roaring(&4, std::cmp::Ordering::Greater); + let bm = col.row_ids_single_cmp_roaring(&4, std::cmp::Ordering::Greater); let exp: Vec = vec![7, 8, 9, 10, 11, 13]; assert_eq!(bm.to_vec(), exp); - let bm = col.row_ids_roaring(&5, std::cmp::Ordering::Greater); + let bm = col.row_ids_single_cmp_roaring(&5, std::cmp::Ordering::Greater); let exp: Vec = vec![8]; assert_eq!(bm.to_vec(), exp); } + #[test] + fn plain_row_ids_gte_lt_roaring() { + let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; + let col = super::PlainFixed::from(input.as_slice()); + + let bm = col.row_ids_gte_lt_roaring(&-1, &7); + let exp: Vec = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; + assert_eq!(bm.to_vec(), exp); + + let bm = col.row_ids_gte_lt_roaring(&1, &5); + let exp: Vec = vec![0, 1, 2, 3, 4, 5, 6, 12]; + assert_eq!(bm.to_vec(), exp); + + let bm = col.row_ids_gte_lt_roaring(&0, &1); + let exp: Vec = vec![]; + assert_eq!(bm.to_vec(), exp); + + let bm = col.row_ids_gte_lt_roaring(&1, &2); + let exp: Vec = vec![0, 1, 2, 3, 12]; + assert_eq!(bm.to_vec(), exp); + } + #[test] fn dict_rle() { let mut drle = super::DictionaryRLE::new(); @@ -581,19 +702,19 @@ mod test { drle.push("abc"); let ids = drle - .row_ids_roaring(Some("abc".to_string())) + .row_ids_eq_roaring(Some("abc".to_string())) .iter() .collect::>(); assert_eq!(ids, vec![0, 1, 2, 5]); let ids = drle - .row_ids_roaring(Some("dre".to_string())) + .row_ids_eq_roaring(Some("dre".to_string())) .iter() .collect::>(); assert_eq!(ids, vec![3, 4]); let ids = drle - .row_ids_roaring(Some("foo".to_string())) + .row_ids_eq_roaring(Some("foo".to_string())) .iter() .collect::>(); let empty: Vec = vec![]; diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index cb6e934535..aad3153c95 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -14,10 +14,8 @@ pub struct Segment { impl Segment { pub fn new(rows: usize) -> Self { - let mut meta = SegmentMetaData::default(); - meta.rows = rows; Self { - meta, + meta: SegmentMetaData::new(rows), columns: vec![], time_column_idx: 0, } @@ -128,52 +126,46 @@ impl Segment { None } - pub fn sum_column(&self, name: &str, row_ids: &croaring::Bitmap) -> Option { + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); } None } - pub fn filter_by_predicate_eq( + pub fn filter_by_predicates_eq( &self, - time_range: Option<(i64, i64)>, + time_range: (i64, i64), predicates: Vec<(&str, Option<&column::Scalar>)>, ) -> Option { - let mut bm = None; - if let Some((min, max)) = time_range { - if !self.meta.overlaps_time_range(min, max) { - return None; // segment doesn't have time range - } - - // TODO THIS COULD BE FASTER! - - // find all timestamps row ids > min time - let rows_gt_min = - self.columns[self.time_column_idx].row_ids_gt(Some(&column::Scalar::Integer(min))); - // find all timestamps < max time - let rows_lt_max = - self.columns[self.time_column_idx].row_ids_lt(Some(&column::Scalar::Integer(max))); - - // Finally intersect matching timestamp rows - if rows_gt_min.is_none() && rows_lt_max.is_none() { - return None; - } else if rows_gt_min.is_none() { - bm = rows_lt_max; - } else if rows_lt_max.is_none() { - bm = rows_gt_min; - } else { - let mut rows = rows_gt_min.unwrap(); - rows.and_inplace(&rows_lt_max.unwrap()); - if rows.is_empty() { - return None; - } - bm = Some(rows); - } + if !self.meta.overlaps_time_range(time_range.0, time_range.1) { + return None; // segment doesn't have time range } + let (seg_min, seg_max) = self.meta.time_range; + if seg_min <= time_range.0 && seg_max >= time_range.1 { + // the segment completely overlaps the time range of query so don't + // need to intersect with time column. + return self.filter_by_predicates_eq_no_time(predicates); + } + + self.filter_by_predicates_eq_time(time_range, predicates) + } + + fn filter_by_predicates_eq_time( + &self, + time_range: (i64, i64), + predicates: Vec<(&str, Option<&column::Scalar>)>, + ) -> Option { + // Get all row_ids matching the time range: + // + // time > time_range.0 AND time < time_range.1 + let mut bm = self.columns[self.time_column_idx].row_ids_gte_lt( + &column::Scalar::Integer(time_range.0), + &column::Scalar::Integer(time_range.1), + )?; + // now intersect matching rows for each column - // let mut bm = bm.unwrap(); for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { match c.row_ids_eq(col_pred_value) { @@ -182,38 +174,91 @@ impl Segment { return None; } - match &mut bm { - Some(all) => { - all.and_inplace(&row_ids); - if all.is_empty() { - // no rows intersect - return None; - } - } - None => bm = Some(row_ids), + bm.and_inplace(&row_ids); + if bm.is_empty() { + return None; } } None => return None, // if this predicate doesn't match then no rows match } } } - bm + Some(bm) + } + + // in this case the complete time range of segment covered so no need to intersect + // on time. + // + // We return an &Option here because we don't want to move the read-only + // meta row_ids bitmap. + fn filter_by_predicates_eq_no_time( + &self, + predicates: Vec<(&str, Option<&column::Scalar>)>, + ) -> Option { + let mut bm: Option = None; + // now intersect matching rows for each column + for (col_pred_name, col_pred_value) in predicates { + if let Some(c) = self.column(col_pred_name) { + match c.row_ids_eq(col_pred_value) { + Some(row_ids) => { + if row_ids.is_empty() { + return None; + } + + if let Some(bm) = &mut bm { + bm.and_inplace(&row_ids); + if bm.is_empty() { + return None; + } + } else { + bm = Some(row_ids); + } + } + None => { + return None; + } // if this predicate doesn't match then no rows match + } + } else { + return None; // column doesn't exist - no matching rows + } + } + + // In this case there are no predicates provided and we have no time + // range restrictions - we need to return a bitset for all row ids. + let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32); + bm.add_range(0..self.num_rows() as u64); + Some(bm) } } /// Meta data for a segment. This data is mainly used to determine if a segment /// may contain value for answering a query. -#[derive(Debug, Default)] +#[derive(Debug)] pub struct SegmentMetaData { size: usize, // TODO rows: usize, column_names: Vec, time_range: (i64, i64), + + // row_ids is a bitmap containing all row ids. + row_ids: croaring::Bitmap, // TODO column sort order } impl SegmentMetaData { + pub fn new(rows: usize) -> Self { + let mut meta = Self { + size: 0, + rows, + column_names: vec![], + time_range: (0, 0), + row_ids: croaring::Bitmap::create_with_capacity(rows as u32), + }; + meta.row_ids.add_range(0..rows as u64); + meta + } + pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { self.time_range.0 <= to && from <= self.time_range.1 } From 3fef4ff1106c1401b1e64416c304b9dc4a992f0f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 11 Aug 2020 11:31:35 +0100 Subject: [PATCH 16/73] feat: add equivalent of ReadFilter --- delorean_mem_qe/src/bin/main.rs | 131 ++++++++++++++++++------------ delorean_mem_qe/src/column.rs | 115 ++++++++++++++++++++------ delorean_mem_qe/src/encoding.rs | 140 +++++++++++++++++++++++--------- delorean_mem_qe/src/segment.rs | 121 +++++++++++++++++---------- 4 files changed, 348 insertions(+), 159 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index d553c72451..f02b246ad3 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -39,9 +39,9 @@ fn main() { // time_column_min_time(&store); // time_column_max_time(&store); // time_column_first(&store); - let segments = store.segments(); - let res = segments.last("host").unwrap(); - println!("{:?}", res); + // let segments = store.segments(); + // let res = segments.last("host").unwrap(); + // println!("{:?}", res); // let segments = segments // .filter_by_time(1590036110000000, 1590044410000000) @@ -85,35 +85,64 @@ fn main() { // println!("{:?}", rows.cardinality()); // time_row_by_preds(&store); - loop { - let mut total_count = 0.0; - let now = std::time::Instant::now(); - for segment in segments.segments() { - let (min, max) = segment.time_range(); - let time_ids = segment.filter_by_predicates_eq((min, max), vec![]).unwrap(); - let group_ids = segment.group_by_column_ids("env").unwrap(); - for (col_values, row_ids) in group_ids { - // filter ids by time - let mut result = row_ids.and(&time_ids); - // let - // println!( - // "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})", - // min, - // max, - // col_values, - // segment.sum_column(&"counter", &result), - // result.cardinality(), - // ); - if let column::Scalar::Float(x) = - segment.sum_column(&"counter", &mut result).unwrap() - { - total_count += x; - } - } - } - println!("Done ({:?}) in {:?}", total_count, now.elapsed()); + let segments = store.segments(); + let columns = segments.read_filter_eq( + (1590040770000000, 1590044410000000), + &[ + ("env", Some(&column::Scalar::String("prod01-us-west-2"))), + ("method", Some(&column::Scalar::String("GET"))), + ( + "host", + Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), + ), + ], + vec![ + "env".to_string(), + "method".to_string(), + "host".to_string(), + "counter".to_string(), + "time".to_string(), + ], + ); + + for (k, v) in columns { + println!("COLUMN {:?}", k); + println!("ROWS ({:?}) {:?}", v.len(), 0); + // println!("ROWS ({:?}) {:?}", v, v.len()); } + + // loop { + // let mut total_count = 0.0; + // let now = std::time::Instant::now(); + // for segment in segments.segments() { + // let (min, max) = segment.time_range(); + // let time_ids = segment + // .filter_by_predicates_eq((min, max), &vec![]) + // .unwrap(); + + // let group_ids = segment.group_by_column_ids("env").unwrap(); + // for (col_values, row_ids) in group_ids { + // // filter ids by time + // let mut result = row_ids.and(&time_ids); + // // let + // // println!( + // // "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})", + // // min, + // // max, + // // col_values, + // // segment.sum_column(&"counter", &result), + // // result.cardinality(), + // // ); + // if let column::Scalar::Float(x) = + // segment.sum_column(&"counter", &mut result).unwrap() + // { + // total_count += x; + // } + // } + // } + // println!("Done ({:?}) in {:?}", total_count, now.elapsed()); + // } } fn build_store( @@ -271,27 +300,27 @@ fn time_column_first(store: &Store) { ); } -fn time_row_by_last_ts(store: &Store) { - let repeat = 100000; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; - let segments = store.segments(); - for _ in 0..repeat { - let now = std::time::Instant::now(); +// fn time_row_by_last_ts(store: &Store) { +// let repeat = 100000; +// let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); +// let mut total_max = 0; +// let segments = store.segments(); +// for _ in 0..repeat { +// let now = std::time::Instant::now(); - let (_, _, row_id) = segments.last("time").unwrap(); - let res = segments.segments().last().unwrap().row(row_id).unwrap(); - total_time += now.elapsed(); - total_max += res.len(); - } - println!( - "Ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - total_max - ); -} +// let (_, _, row_id) = segments.last("time").unwrap(); +// let res = segments.segments().last().unwrap().row(row_id).unwrap(); +// total_time += now.elapsed(); +// total_max += res.len(); +// } +// println!( +// "Ran {:?} in {:?} {:?} / call {:?}", +// repeat, +// total_time, +// total_time / repeat, +// total_max +// ); +// } fn time_row_by_preds(store: &Store) { let repeat = 100000; @@ -307,7 +336,7 @@ fn time_row_by_preds(store: &Store) { .unwrap() .filter_by_predicates_eq( (1590040770000000, 1590040790000000), - vec![ + &vec![ ("env", Some(&column::Scalar::String("prod01-us-west-2"))), ("method", Some(&column::Scalar::String("GET"))), ( diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 5b75e49d7f..be0809cbb6 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -12,8 +12,47 @@ pub enum Scalar<'a> { #[derive(Debug)] pub enum Vector<'a> { String(Vec<&'a Option>), - Float(&'a [f64]), - Integer(&'a [i64]), + Float(Vec<&'a f64>), + Integer(Vec<&'a i64>), +} + +impl<'a> Vector<'a> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> usize { + match self { + Self::String(v) => v.len(), + Self::Float(v) => v.len(), + Self::Integer(v) => v.len(), + } + } + pub fn extend(&mut self, other: Self) { + match self { + Self::String(v) => { + if let Self::String(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::Float(v) => { + if let Self::Float(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::Integer(v) => { + if let Self::Integer(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + } + } } #[derive(Debug)] @@ -69,29 +108,45 @@ impl Column { } } - /// materialise all rows including and after row_id - pub fn scan_from(&self, row_id: usize) -> Option { - if row_id >= self.num_rows() { - println!( - "asking for {:?} but only got {:?} rows", - row_id, - self.num_rows() - ); - return None; - } - - println!( - "asking for {:?} with a column having {:?} rows", - row_id, - self.num_rows() + /// materialise rows for each row_id + pub fn rows(&self, row_ids: &[usize]) -> Vector { + assert!( + row_ids.len() == 1 || row_ids[row_ids.len() - 1] > row_ids[0], + "got last row_id={:?} and first row_id={:?}", + row_ids[row_ids.len() - 1], + row_ids[0] ); match self { - Column::String(c) => Some(Vector::String(c.scan_from(row_id))), - Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), - Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), + Column::String(c) => Vector::String(c.values(row_ids)), + Column::Float(c) => Vector::Float(c.values(row_ids)), + Column::Integer(c) => Vector::Integer(c.values(row_ids)), } } + /// materialise all rows including and after row_id + pub fn scan_from(&self, row_id: usize) -> Option { + unimplemented!("todo"); + // if row_id >= self.num_rows() { + // println!( + // "asking for {:?} but only got {:?} rows", + // row_id, + // self.num_rows() + // ); + // return None; + // } + + // println!( + // "asking for {:?} with a column having {:?} rows", + // row_id, + // self.num_rows() + // ); + // match self { + // Column::String(c) => Some(Vector::String(c.scan_from(row_id))), + // Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), + // Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), + // } + } + /// Given the provided row_id scans the column until a non-null value found /// or the column is exhausted. pub fn scan_from_until_some(&self, row_id: usize) -> Option { @@ -299,8 +354,8 @@ impl Column { Column::Float(c) => { let (col_min, col_max) = c.meta.range(); if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) { - if *low >= col_min && *high < col_max { - // In this case the column completely covers the range. + if *low <= col_min && *high > col_max { + // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than // return a full one. Need to differentiate between "no values" // and "all values" in the context of an Option. Right now @@ -322,8 +377,8 @@ impl Column { Column::Integer(c) => { let (col_min, col_max) = c.meta.range(); if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) { - if *low >= col_min && *high < col_max { - // In this case the column completely covers the range. + if *low <= col_min && *high > col_max { + // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than // return a full one. Need to differentiate between "no values" // and "all values" in the context of an Option. Right now @@ -428,6 +483,10 @@ impl String { self.data.value(row_id) } + pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { + self.data.values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> Vec<&Option> { self.data.scan_from(row_id) } @@ -466,6 +525,10 @@ impl Float { self.data.value(row_id) } + pub fn values(&self, row_ids: &[usize]) -> Vec<&f64> { + self.data.values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> &[f64] { self.data.scan_from(row_id) } @@ -519,6 +582,10 @@ impl Integer { self.data.value(row_id) } + pub fn values(&self, row_ids: &[usize]) -> Vec<&i64> { + self.data.values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> &[i64] { self.data.scan_from(row_id) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index a575e240a0..c2d0052f3e 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -43,6 +43,14 @@ where self.values[row_id] } + pub fn values(&self, row_ids: &[usize]) -> Vec<&T> { + let mut out = Vec::with_capacity(row_ids.len()); + for row_id in row_ids { + out.push(&self.values[*row_id]); + } + out + } + // TODO(edd): fix this when added NULL support pub fn scan_from_until_some(&self, row_id: usize) -> Option { unreachable!("to remove"); @@ -392,6 +400,61 @@ impl DictionaryRLE { None } + // materialises a vector of references to logical values in the + // encoding for each provided row_id. + pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { + let mut out: Vec<&Option> = Vec::with_capacity(row_ids.len()); + + let mut curr_logical_row_id = 0; + + let mut run_lengths_iter = self.run_lengths.iter(); + let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap(); + + for wanted_row_id in row_ids { + while curr_logical_row_id + curr_entry_rl <= *wanted_row_id as u64 { + // this encoded entry does not cover the row we need. + // move on to next entry + curr_logical_row_id += curr_entry_rl; + match run_lengths_iter.next() { + Some(res) => { + curr_entry_id = res.0; + curr_entry_rl = res.1; + } + None => panic!("shouldn't get here"), + } + } + + // this encoded entry covers the row_id we want. + let value = self.index_entry.get(&curr_entry_id).unwrap(); + out.push(value); + curr_logical_row_id += 1; + curr_entry_rl -= 1; + } + + assert_eq!(row_ids.len(), out.len()); + out + } + + // values materialises a vector of references to all logical values in the + // encoding. + pub fn all_values(&mut self) -> Vec> { + let mut out: Vec> = Vec::with_capacity(self.total as usize); + + // build reverse mapping. + let mut idx_value = BTreeMap::new(); + for (k, v) in &self.entry_index { + idx_value.insert(v, k); + } + assert_eq!(idx_value.len(), self.entry_index.len()); + + for (idx, rl) in &self.run_lengths { + // TODO(edd): fix unwrap - we know that the value exists in map... + let v = idx_value.get(&idx).unwrap().as_ref(); + out.extend(iter::repeat(v).take(*rl as usize)); + } + out + } + // materialise a slice of rows starting from index. pub fn scan_from(&self, index: usize) -> Vec<&Option> { let mut result = vec![]; @@ -428,44 +491,6 @@ impl DictionaryRLE { result } - // // get the logical value at the provided index, or scan to the next value - // // that is non-null. - // pub fn scan_from_until_some(&self, index: usize) -> Option<&String> { - // if index < self.total as usize { - // let mut total = 0; - // for (idx, rl) in &self.run_lengths { - // if total + rl > index as u64 { - // // If there is a value then return otherwise continue. - // if let Some(v) = self.index_entry.get(idx) { - // return v.as_ref(); - // } - // } - // total += rl; - // } - // } - // None - // } - - // values materialises a vector of references to all logical values in the - // encoding. - pub fn values(&mut self) -> Vec> { - let mut out: Vec> = Vec::with_capacity(self.total as usize); - - // build reverse mapping. - let mut idx_value = BTreeMap::new(); - for (k, v) in &self.entry_index { - idx_value.insert(v, k); - } - assert_eq!(idx_value.len(), self.entry_index.len()); - - for (idx, rl) in &self.run_lengths { - // TODO(edd): fix unwrap - we know that the value exists in map... - let v = idx_value.get(&idx).unwrap().as_ref(); - out.extend(iter::repeat(v).take(*rl as usize)); - } - out - } - pub fn size(&self) -> usize { // mapping and reverse mapping then the rles 2 * self.map_size + self.run_length_size @@ -571,7 +596,7 @@ mod test { drle.push_additional(Some("hello".to_string()), 1); assert_eq!( - drle.values(), + drle.all_values(), [ Some(&"hello".to_string()), Some(&"hello".to_string()), @@ -584,7 +609,7 @@ mod test { drle.push_additional(Some("zoo".to_string()), 3); assert_eq!( - drle.values(), + drle.all_values(), [ Some(&"hello".to_string()), Some(&"hello".to_string()), @@ -670,6 +695,41 @@ mod test { assert_eq!(results, exp); } + #[test] + fn dict_rle_values() { + let mut drle = super::DictionaryRLE::new(); + let west = Some("west".to_string()); + let east = Some("east".to_string()); + let north = Some("north".to_string()); + drle.push_additional(west.clone(), 3); + drle.push_additional(east.clone(), 2); + drle.push_additional(north.clone(), 4); + drle.push_additional(west.clone(), 3); + + let results = drle.values(&[0, 1, 4, 5]); + + // w,w,w,e,e,n,n,n,n,w, w, w + // 0 1 2 3 4 5 6 7 8 9 10 11 + let exp = vec![&west, &west, &east, &north]; + assert_eq!(results, exp); + + let results = drle.values(&[10, 11]); + let exp = vec![&west, &west]; + assert_eq!(results, exp); + + let results = drle.values(&[0, 3, 5, 11]); + let exp = vec![&west, &east, &north, &west]; + assert_eq!(results, exp); + + let results = drle.values(&[0]); + let exp = vec![&west]; + assert_eq!(results, exp); + + let results = drle.values(&[0, 9]); + let exp = vec![&west, &west]; + assert_eq!(results, exp); + } + #[test] fn rle_dict_row_ids() { let mut drle = super::DictionaryRLE::new(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index aad3153c95..495bc8c62b 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -103,17 +103,32 @@ impl Segment { None } - pub fn row(&self, row_id: usize) -> Option>> { - if row_id >= self.num_rows() { - return None; + // Materialise all rows for each desired column. `rows` expects `row_ids` to + // be ordered in ascending order. + // + // `columns` determines which column values are returned. An empty `columns` + // value will result in rows for all columns being returned. + pub fn rows(&self, row_ids: &[usize], columns: &[String]) -> BTreeMap { + let mut rows: BTreeMap = BTreeMap::new(); + if row_ids.is_empty() { + // nothing to return + return rows; } - Some( - self.columns - .iter() - .map(|c| c.value(row_id)) - .collect::>>(), - ) + let cols_to_process = if columns.is_empty() { + &self.meta.column_names + } else { + columns + }; + + for col_name in cols_to_process { + let column = self.column(col_name.as_str()); + if let Some(column) = column { + rows.insert(col_name.clone(), column.rows(row_ids)); + }; + } + + rows } pub fn group_by_column_ids( @@ -136,20 +151,19 @@ impl Segment { pub fn filter_by_predicates_eq( &self, time_range: (i64, i64), - predicates: Vec<(&str, Option<&column::Scalar>)>, + predicates: &[(&str, Option<&column::Scalar>)], ) -> Option { if !self.meta.overlaps_time_range(time_range.0, time_range.1) { return None; // segment doesn't have time range } let (seg_min, seg_max) = self.meta.time_range; - if seg_min <= time_range.0 && seg_max >= time_range.1 { - // the segment completely overlaps the time range of query so don't - // need to intersect with time column. + if time_range.0 <= seg_min && time_range.1 > seg_max { + // the segment is completely overlapped by the time range of query, + // so don't need to intersect predicate results with time column. return self.filter_by_predicates_eq_no_time(predicates); } - - self.filter_by_predicates_eq_time(time_range, predicates) + self.filter_by_predicates_eq_time(time_range, predicates.to_vec()) } fn filter_by_predicates_eq_time( @@ -193,13 +207,22 @@ impl Segment { // meta row_ids bitmap. fn filter_by_predicates_eq_no_time( &self, - predicates: Vec<(&str, Option<&column::Scalar>)>, + predicates: &[(&str, Option<&column::Scalar>)], ) -> Option { + if predicates.is_empty() { + // In this case there are no predicates provided and we have no time + // range restrictions - we need to return a bitset for all row ids. + let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32); + bm.add_range(0..self.num_rows() as u64); + return Some(bm); + } + let mut bm: Option = None; // now intersect matching rows for each column for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { - match c.row_ids_eq(col_pred_value) { + // TODO(edd): rework this clone + match c.row_ids_eq(*col_pred_value) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -222,12 +245,7 @@ impl Segment { return None; // column doesn't exist - no matching rows } } - - // In this case there are no predicates provided and we have no time - // range restrictions - we need to return a bitset for all row ids. - let mut bm = croaring::Bitmap::create_with_capacity(self.num_rows() as u32); - bm.add_range(0..self.num_rows() as u64); - Some(bm) + bm } } @@ -295,29 +313,44 @@ impl<'a> Segments<'a> { Self::new(segments) } - // pub fn filter_by_predicate_eq( - // &self, - // time_range: Option<(i64, i64)>, - // predicates: Vec<(&str, &column::Scalar)>, - // ) -> Option { - // let bm = None; - // for segment in self.segments { - // if let Some((min, max)) = time_range { - // if !segment.meta.overlaps_time_range(min, max) { - // continue; // segment doesn't have time range - // } - // } + // read_filter_eq returns rows of data for the desired columns. Results may + // be filtered by (currently) equality predicates and ranged by time. + pub fn read_filter_eq( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + select_columns: Vec, + ) -> BTreeMap { + let (min, max) = time_range; + if max <= min { + panic!("max <= min"); + } - // // build set of + let mut columns: BTreeMap = BTreeMap::new(); + for segment in &self.segments { + if !segment.meta.overlaps_time_range(min, max) { + continue; // segment doesn't have time range + } - // if let Some(col) = segment.column(column_name) { - // if col.maybe_contains(&value) { - // segments.push(segment); - // } - // } - // } - // Self::new(segments) - // } + if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) { + let bm_vec = bm.to_vec(); + let row_ids = bm_vec.iter().map(|v| *v as usize).collect::>(); + + let rows = segment.rows(&row_ids, &select_columns); + for (k, v) in rows { + let segment_values = columns.get_mut(&k); + match segment_values { + Some(values) => values.extend(v), + None => { + columns.insert(k.to_owned(), v); + } + } + } + }; + } + + columns + } /// Returns the minimum value for a column in a set of segments. pub fn column_min(&self, column_name: &str) -> Option { From 2387b7c8498972e72380d729525d1f461dce094f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 11 Aug 2020 19:59:30 +0100 Subject: [PATCH 17/73] feat: add support for group by aggregate --- Cargo.lock | 1 + delorean_mem_qe/Cargo.toml | 1 + delorean_mem_qe/src/bin/main.rs | 93 +++++++++++++++------ delorean_mem_qe/src/column.rs | 76 +++++++++++++++++- delorean_mem_qe/src/segment.rs | 138 +++++++++++++++++++++++++++++++- 5 files changed, 281 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 328fb59d2e..effb3598b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -765,6 +765,7 @@ name = "delorean_mem_qe" version = "0.1.0" dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "chrono", "croaring", "delorean_table", "snafu", diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index aaf38f1b7a..32531e888b 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -11,6 +11,7 @@ delorean_table = { path = "../delorean_table" } arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } snafu = "0.6.8" croaring = "0.4.5" +chrono = "0.4" [dev-dependencies] diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index f02b246ad3..8c7a89c1f0 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -5,7 +5,7 @@ use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; use delorean_mem_qe::column::{Column, Scalar}; -use delorean_mem_qe::segment::Segment; +use delorean_mem_qe::segment::{Aggregate, Segment}; use delorean_mem_qe::Store; // use snafu::ensure; @@ -36,6 +36,8 @@ fn main() { store.size(), ); + time_group_by_agg(&store); + // time_column_min_time(&store); // time_column_max_time(&store); // time_column_first(&store); @@ -86,31 +88,37 @@ fn main() { // time_row_by_preds(&store); - let segments = store.segments(); - let columns = segments.read_filter_eq( - (1590040770000000, 1590044410000000), - &[ - ("env", Some(&column::Scalar::String("prod01-us-west-2"))), - ("method", Some(&column::Scalar::String("GET"))), - ( - "host", - Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), - ), - ], - vec![ - "env".to_string(), - "method".to_string(), - "host".to_string(), - "counter".to_string(), - "time".to_string(), - ], - ); + // let segments = store.segments(); + // let columns = segments.read_filter_eq( + // (1590036110000000, 1590040770000000), + // &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))], + // vec![ + // "env".to_string(), + // "method".to_string(), + // "host".to_string(), + // "counter".to_string(), + // "time".to_string(), + // ], + // ); - for (k, v) in columns { - println!("COLUMN {:?}", k); - println!("ROWS ({:?}) {:?}", v.len(), 0); - // println!("ROWS ({:?}) {:?}", v, v.len()); - } + // for (k, v) in columns { + // println!("COLUMN {:?}", k); + // // println!("ROWS ({:?}) {:?}", v.len(), 0); + // println!("ROWS ({}) {:?}", v, v.len()); + // } + + // let now = std::time::Instant::now(); + // let segments = store.segments(); + // let groups = segments.read_group_eq( + // (0, 1590044410000000), + // &[], + // vec!["env".to_string()], + // vec![ + // // ("counter".to_string(), Aggregate::Sum), + // ("counter".to_string(), Aggregate::Count), + // ], + // ); + // println!("{:?} {:?}", groups, now.elapsed()); // loop { // let mut total_count = 0.0; @@ -149,7 +157,12 @@ fn build_store( mut reader: arrow::ipc::reader::StreamReader, store: &mut Store, ) -> Result<(), Error> { + // let mut i = 0; while let Some(rb) = reader.next_batch().unwrap() { + // if i < 363 { + // i += 1; + // continue; + // } let segment = convert_record_batch(rb)?; store.add_segment(segment); } @@ -366,3 +379,33 @@ fn time_row_by_preds(store: &Store) { total_max ); } + +fn time_group_by_agg(store: &Store) { + let repeat = 100; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.read_group_eq( + (0, 1590044410000000), + &[("method", Some(&column::Scalar::String("GET")))], + vec!["env".to_string()], + vec![ + ("counter".to_string(), Aggregate::Sum), + // ("counter".to_string(), Aggregate::Count), + ], + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "Ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index be0809cbb6..4e55ed8196 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -2,14 +2,68 @@ use std::convert::From; use super::encoding; -#[derive(Debug, PartialEq, PartialOrd)] +#[derive(Debug, PartialEq, PartialOrd, Clone)] pub enum Scalar<'a> { String(&'a str), Float(f64), Integer(i64), } -#[derive(Debug)] +impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { + type Output = Scalar<'a>; + + fn add(self, _rhs: &Scalar<'a>) -> Self::Output { + match self { + Self::Float(v) => { + if let Self::Float(other) = _rhs { + return Self::Float(v + other); + } else { + panic!("invalid"); + }; + } + Self::Integer(v) => { + if let Self::Integer(other) = _rhs { + return Self::Integer(v + other); + } else { + panic!("invalid"); + }; + } + Self::String(_) => { + unreachable!("not possible to add strings"); + } + } + } +} + +#[derive(Clone, Debug)] +pub enum Aggregate<'a> { + Count(u64), + Sum(Scalar<'a>), +} + +impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { + type Output = Aggregate<'a>; + + fn add(self, _rhs: &Aggregate<'a>) -> Self::Output { + match self { + Self::Count(c) => { + if let Self::Count(other) = _rhs { + return Self::Count(c + other); + } else { + panic!("invalid"); + }; + } + Self::Sum(s) => { + if let Self::Sum(other) = _rhs { + return Self::Sum(s + other); + } else { + panic!("invalid"); + }; + } + } + } +} + pub enum Vector<'a> { String(Vec<&'a Option>), Float(Vec<&'a f64>), @@ -55,6 +109,24 @@ impl<'a> Vector<'a> { } } +use chrono::prelude::*; + +impl<'a> std::fmt::Display for Vector<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(v) => write!(f, "{:?}", v), + Self::Float(v) => write!(f, "{:?}", v), + Self::Integer(v) => { + for x in v { + let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0); + write!(f, "{}, ", ts)?; + } + Ok(()) + } + } + } +} + #[derive(Debug)] pub enum Column { String(String), diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 495bc8c62b..e578578b7e 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -148,6 +148,18 @@ impl Segment { None } + // Returns the count aggregate for a given column name. + // + // Since we guarantee to provide row ids for the segment, and all columns + // have the same number of logical rows, the count is just the number of + // requested logical rows. + pub fn count_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { + if self.column(name).is_some() { + return Some(row_ids.cardinality() as u64); + } + None + } + pub fn filter_by_predicates_eq( &self, time_range: (i64, i64), @@ -247,6 +259,72 @@ impl Segment { } bm } + + pub fn group_agg_by_predicate_eq( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &Vec, + aggregates: &Vec<(String, Aggregate)>, + ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + let mut grouped_results = BTreeMap::new(); + + let filter_row_ids: croaring::Bitmap; + match self.filter_by_predicates_eq(time_range, predicates) { + Some(row_ids) => filter_row_ids = row_ids, + None => { + return grouped_results; + } + } + + if let Some(grouped_row_ids) = self.group_by_column_ids(&group_columns[0]) { + for (group_key_value, row_ids) in grouped_row_ids.iter() { + let mut filtered_row_ids = row_ids.and(&filter_row_ids); + if !filtered_row_ids.is_empty() { + // First calculate all of the aggregates for this grouped value + let mut aggs: Vec<((String, Aggregate), column::Aggregate)> = + Vec::with_capacity(aggregates.len()); + + for (col_name, agg) in aggregates { + match &agg { + Aggregate::Sum => { + aggs.push(( + (col_name.to_string(), agg.clone()), + column::Aggregate::Sum( + self.sum_column(col_name, &mut filtered_row_ids).unwrap(), + ), // assuming no non-null group keys + )); + } + Aggregate::Count => { + aggs.push(( + (col_name.to_string(), agg.clone()), + column::Aggregate::Count( + self.count_column(col_name, &mut filtered_row_ids).unwrap(), + ), // assuming no non-null group keys + )); + } + } + } + + // Next add these aggregates to the result set, keyed + // by the grouped value. + assert_eq!(aggs.len(), aggregates.len()); + grouped_results.insert(vec![group_key_value.clone().unwrap()], aggs); + } else { + // In this case there are grouped values in the column with no + // rows falling into time-range/predicate set. + println!( + "grouped value {:?} has no rows in time-range/predicate set", + group_key_value + ); + } + } + } else { + // segment doesn't have the column so can't group on it. + println!("don't have column - can't group"); + } + grouped_results + } } /// Meta data for a segment. This data is mainly used to determine if a segment @@ -282,6 +360,12 @@ impl SegmentMetaData { } } +#[derive(Debug, Clone)] +pub enum Aggregate { + Count, + Sum, +} + pub struct Segments<'a> { segments: Vec<&'a Segment>, } @@ -331,7 +415,6 @@ impl<'a> Segments<'a> { if !segment.meta.overlaps_time_range(min, max) { continue; // segment doesn't have time range } - if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) { let bm_vec = bm.to_vec(); let row_ids = bm_vec.iter().map(|v| *v as usize).collect::>(); @@ -352,6 +435,59 @@ impl<'a> Segments<'a> { columns } + // read_group_eq returns grouped aggregates of for the specified columns. + // Results may be filtered by (currently) equality predicates and ranged + // by time. + pub fn read_group_eq( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: Vec, + aggregates: Vec<(String, Aggregate)>, + ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + // TODO(edd): support multi column groups + assert_eq!(group_columns.len(), 1); + + let (min, max) = time_range; + if max <= min { + panic!("max <= min"); + } + + let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = + BTreeMap::new(); + + for segment in &self.segments { + let segment_results = segment.group_agg_by_predicate_eq( + time_range, + predicates, + &group_columns, + &aggregates, + ); + + for (k, segment_aggs) in segment_results { + // assert_eq!(v.len(), aggregates.len()); + let cum_result = cum_results.get_mut(&k); + match cum_result { + Some(cum) => { + assert_eq!(cum.len(), segment_aggs.len()); + // In this case we need to aggregate the aggregates from + // each segment. + for i in 0..cum.len() { + // TODO(edd): this is more expensive than necessary + cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1); + } + } + None => { + cum_results.insert(k, segment_aggs); + } + } + } + } + + // columns + cum_results + } + /// Returns the minimum value for a column in a set of segments. pub fn column_min(&self, column_name: &str) -> Option { if self.segments.is_empty() { From 0d5b6489804c8fc828ab8550c6b2e7aec3e3ca6e Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 12 Aug 2020 11:45:33 +0100 Subject: [PATCH 18/73] feat: add support for returning encoded values --- delorean_mem_qe/src/bin/main.rs | 12 ++--- delorean_mem_qe/src/column.rs | 72 ++++++++++++++++++++++----- delorean_mem_qe/src/encoding.rs | 88 +++++++++++++++++++++++++++++++-- delorean_mem_qe/src/segment.rs | 45 ++++++++++++++--- 4 files changed, 186 insertions(+), 31 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 8c7a89c1f0..6efffbf8b5 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -157,12 +157,12 @@ fn build_store( mut reader: arrow::ipc::reader::StreamReader, store: &mut Store, ) -> Result<(), Error> { - // let mut i = 0; + let mut i = 0; while let Some(rb) = reader.next_batch().unwrap() { - // if i < 363 { - // i += 1; - // continue; - // } + if i < 364 { + i += 1; + continue; + } let segment = convert_record_batch(rb)?; store.add_segment(segment); } @@ -391,7 +391,7 @@ fn time_group_by_agg(store: &Store) { let groups = segments.read_group_eq( (0, 1590044410000000), &[("method", Some(&column::Scalar::String("GET")))], - vec!["env".to_string()], + vec!["env".to_string(), "status".to_string()], vec![ ("counter".to_string(), Aggregate::Sum), // ("counter".to_string(), Aggregate::Count), diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 4e55ed8196..8375708e91 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -66,8 +66,8 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { pub enum Vector<'a> { String(Vec<&'a Option>), - Float(Vec<&'a f64>), - Integer(Vec<&'a i64>), + Float(Vec), + Integer(Vec), } impl<'a> Vector<'a> { @@ -117,7 +117,7 @@ impl<'a> std::fmt::Display for Vector<'a> { Self::String(v) => write!(f, "{:?}", v), Self::Float(v) => write!(f, "{:?}", v), Self::Integer(v) => { - for x in v { + for x in v.iter() { let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0); write!(f, "{}, ", ts)?; } @@ -153,6 +153,8 @@ impl Column { } } + /// Materialise all of the decoded values matching the provided logical + /// row ids. pub fn value(&self, row_id: usize) -> Option { match self { Column::String(c) => { @@ -180,18 +182,50 @@ impl Column { } } + /// Materialise all of the encoded values matching the provided logical + /// row ids. + pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector { + match self { + Column::String(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::Integer(c.encoded_values(&row_id_vec)) + } + Column::Float(c) => { + if row_ids.is_empty() { + return Vector::Float(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::Float(c.encoded_values(&row_id_vec)) + } + Column::Integer(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::Integer(c.encoded_values(&row_id_vec)) + } + } + } + /// materialise rows for each row_id - pub fn rows(&self, row_ids: &[usize]) -> Vector { + pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { + let row_ids_vec = row_ids.to_vec(); assert!( - row_ids.len() == 1 || row_ids[row_ids.len() - 1] > row_ids[0], + row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0], "got last row_id={:?} and first row_id={:?}", - row_ids[row_ids.len() - 1], - row_ids[0] + row_ids_vec[row_ids_vec.len() - 1], + row_ids_vec[0] ); match self { - Column::String(c) => Vector::String(c.values(row_ids)), - Column::Float(c) => Vector::Float(c.values(row_ids)), - Column::Integer(c) => Vector::Integer(c.values(row_ids)), + Column::String(c) => Vector::String(c.values(&row_ids_vec)), + Column::Float(c) => Vector::Float(c.values(&row_ids_vec)), + Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)), } } @@ -555,10 +589,14 @@ impl String { self.data.value(row_id) } - pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { + pub fn values(&self, row_ids: &[u32]) -> Vec<&Option> { self.data.values(row_ids) } + pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + self.data.encoded_values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> Vec<&Option> { self.data.scan_from(row_id) } @@ -597,10 +635,14 @@ impl Float { self.data.value(row_id) } - pub fn values(&self, row_ids: &[usize]) -> Vec<&f64> { + pub fn values(&self, row_ids: &[u32]) -> Vec { self.data.values(row_ids) } + pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + self.data.encoded_values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> &[f64] { self.data.scan_from(row_id) } @@ -654,10 +696,14 @@ impl Integer { self.data.value(row_id) } - pub fn values(&self, row_ids: &[usize]) -> Vec<&i64> { + pub fn values(&self, row_ids: &[u32]) -> Vec { self.data.values(row_ids) } + pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + self.data.encoded_values(row_ids) + } + pub fn scan_from(&self, row_id: usize) -> &[i64] { self.data.scan_from(row_id) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index c2d0052f3e..d159a557f7 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -43,14 +43,21 @@ where self.values[row_id] } - pub fn values(&self, row_ids: &[usize]) -> Vec<&T> { + /// Return the decoded values for the provided logical row ids. + pub fn values(&self, row_ids: &[u32]) -> Vec { let mut out = Vec::with_capacity(row_ids.len()); for row_id in row_ids { - out.push(&self.values[*row_id]); + out.push(self.values[*row_id as usize]); } out } + /// Return the raw encoded values for the provided logical row ids. For Plain + /// encoding this is just the decoded values. + pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + self.values(row_ids) + } + // TODO(edd): fix this when added NULL support pub fn scan_from_until_some(&self, row_id: usize) -> Option { unreachable!("to remove"); @@ -400,9 +407,9 @@ impl DictionaryRLE { None } - // materialises a vector of references to logical values in the - // encoding for each provided row_id. - pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { + // materialises a vector of references to the decoded values in the + // each provided row_id. + pub fn values(&self, row_ids: &[u32]) -> Vec<&Option> { let mut out: Vec<&Option> = Vec::with_capacity(row_ids.len()); let mut curr_logical_row_id = 0; @@ -435,6 +442,42 @@ impl DictionaryRLE { out } + /// Return the raw encoded values for the provided logical row ids. + /// + /// TODO(edd): return type is wrong but I'm making it fit + /// + pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + let mut out: Vec = Vec::with_capacity(row_ids.len()); + + let mut curr_logical_row_id = 0; + + let mut run_lengths_iter = self.run_lengths.iter(); + let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap(); + + for wanted_row_id in row_ids { + while curr_logical_row_id + curr_entry_rl <= *wanted_row_id as u64 { + // this encoded entry does not cover the row we need. + // move on to next entry + curr_logical_row_id += curr_entry_rl; + match run_lengths_iter.next() { + Some(res) => { + curr_entry_id = res.0; + curr_entry_rl = res.1; + } + None => panic!("shouldn't get here"), + } + } + + // this entry covers the row_id we want. + out.push(curr_entry_id as i64); + curr_logical_row_id += 1; + curr_entry_rl -= 1; + } + + assert_eq!(row_ids.len(), out.len()); + out + } + // values materialises a vector of references to all logical values in the // encoding. pub fn all_values(&mut self) -> Vec> { @@ -730,6 +773,41 @@ mod test { assert_eq!(results, exp); } + #[test] + fn dict_rle_encoded_values() { + let mut drle = super::DictionaryRLE::new(); + let west = Some("west".to_string()); + let east = Some("east".to_string()); + let north = Some("north".to_string()); + drle.push_additional(west.clone(), 3); + drle.push_additional(east.clone(), 2); + drle.push_additional(north.clone(), 4); + drle.push_additional(west.clone(), 3); + + let results = drle.encoded_values(&[0, 1, 4, 5]); + + // w,w,w,e,e,n,n,n,n,w,w,w + // 0,0,0,1,1,2,2,2,2,0,0,0 + let exp = vec![0, 0, 1, 2]; + assert_eq!(results, exp); + + let results = drle.encoded_values(&[10, 11]); + let exp = vec![0, 0]; + assert_eq!(results, exp); + + let results = drle.encoded_values(&[0, 3, 5, 11]); + let exp = vec![0, 1, 2, 0]; + assert_eq!(results, exp); + + let results = drle.encoded_values(&[0]); + let exp = vec![0]; + assert_eq!(results, exp); + + let results = drle.encoded_values(&[0, 9]); + let exp = vec![0, 0]; + assert_eq!(results, exp); + } + #[test] fn rle_dict_row_ids() { let mut drle = super::DictionaryRLE::new(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index e578578b7e..6871dc9e20 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -103,12 +103,15 @@ impl Segment { None } - // Materialise all rows for each desired column. `rows` expects `row_ids` to - // be ordered in ascending order. + // Materialise all rows for each desired column. // // `columns` determines which column values are returned. An empty `columns` // value will result in rows for all columns being returned. - pub fn rows(&self, row_ids: &[usize], columns: &[String]) -> BTreeMap { + pub fn rows( + &self, + row_ids: &croaring::Bitmap, + columns: &[String], + ) -> BTreeMap { let mut rows: BTreeMap = BTreeMap::new(); if row_ids.is_empty() { // nothing to return @@ -141,6 +144,37 @@ impl Segment { None } + pub fn aggregate_by_groups( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: Vec, + aggregates: &Vec<(String, Aggregate)>, + ) -> BTreeMap, Vec<(String, Aggregate)>> { + // Build a hash table - essentially, scan columns for matching row ids, + // emitting the encoded value for each column and track those value + // combinations in a hashmap with running aggregates. + + // filter on predicates and time + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { + filtered_row_ids = row_ids; + } else { + return BTreeMap::new(); + } + + // materialise all encoded values for the matching rows. + // let mut column_encoded_values = Vec::with_capacity(group_columns.len()); + for group_column in group_columns { + // if let Some(column) = self.column(&group_column) { + // column_encoded_values.push(Some(column.encoded_values(&filtered_row_ids)); + // } else { + // column_encoded_values.push(None); + // } + } + BTreeMap::new() + } + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); @@ -416,10 +450,7 @@ impl<'a> Segments<'a> { continue; // segment doesn't have time range } if let Some(bm) = segment.filter_by_predicates_eq(time_range, predicates) { - let bm_vec = bm.to_vec(); - let row_ids = bm_vec.iter().map(|v| *v as usize).collect::>(); - - let rows = segment.rows(&row_ids, &select_columns); + let rows = segment.rows(&bm, &select_columns); for (k, v) in rows { let segment_values = columns.get_mut(&k); match segment_values { From 3df79a675d15ab08fcfc00dee8155b2ad17b0d43 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 13 Aug 2020 10:07:34 +0100 Subject: [PATCH 19/73] feat: multi-group hash --- delorean_mem_qe/src/bin/main.rs | 38 +++--- delorean_mem_qe/src/column.rs | 121 +++++++++++++++++ delorean_mem_qe/src/segment.rs | 231 ++++++++++++++++++++++++++------ 3 files changed, 331 insertions(+), 59 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 6efffbf8b5..e6bd236c26 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -36,7 +36,7 @@ fn main() { store.size(), ); - time_group_by_agg(&store); + // time_group_by_agg(&store); // time_column_min_time(&store); // time_column_max_time(&store); @@ -107,18 +107,20 @@ fn main() { // println!("ROWS ({}) {:?}", v, v.len()); // } - // let now = std::time::Instant::now(); - // let segments = store.segments(); - // let groups = segments.read_group_eq( - // (0, 1590044410000000), - // &[], - // vec!["env".to_string()], - // vec![ - // // ("counter".to_string(), Aggregate::Sum), - // ("counter".to_string(), Aggregate::Count), - // ], - // ); - // println!("{:?} {:?}", groups, now.elapsed()); + loop { + let now = std::time::Instant::now(); + let segments = store.segments(); + let groups = segments.read_group_eq( + (0, 1590044410000000), + &[], + vec!["env".to_string(), "status".to_string()], + vec![ + ("counter".to_string(), Aggregate::Sum), + // ("counter".to_string(), Aggregate::Count), + ], + ); + println!("{:?} {:?}", groups, now.elapsed()); + } // loop { // let mut total_count = 0.0; @@ -157,12 +159,12 @@ fn build_store( mut reader: arrow::ipc::reader::StreamReader, store: &mut Store, ) -> Result<(), Error> { - let mut i = 0; + // let mut i = 0; while let Some(rb) = reader.next_batch().unwrap() { - if i < 364 { - i += 1; - continue; - } + // if i < 364 { + // i += 1; + // continue; + // } let segment = convert_record_batch(rb)?; store.add_segment(segment); } diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 8375708e91..ec62fd8104 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -35,6 +35,30 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { } } +impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { + fn add_assign(&mut self, _rhs: &Scalar<'a>) { + match self { + Self::Float(v) => { + if let Self::Float(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } + Self::Integer(v) => { + if let Self::Integer(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } + Self::String(_) => { + unreachable!("not possible to add strings"); + } + } + } +} + #[derive(Clone, Debug)] pub enum Aggregate<'a> { Count(u64), @@ -64,6 +88,35 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { } } +// impl<'a> std::ops::Add<&Scalar<'a>> for Aggregate<'a> { +// type Output = Aggregate<'a>; + +// fn add(self, _rhs: &Scalar<'a>) -> Self::Output { +// match _rhs { +// Scalar::String(v) => {} +// Scalar::Float(v) => {} +// Scalar::Integer(v) => {} +// } +// // match self { +// // Self::Count(c) => { +// // match +// // if let Scalar::Count(other) = _rhs { +// // return Self::Count(c + other); +// // } else { +// // panic!("invalid"); +// // }; +// // } +// // Self::Sum(s) => { +// // if let Self::Sum(other) = _rhs { +// // return Self::Sum(s + other); +// // } else { +// // panic!("invalid"); +// // }; +// // } +// // } +// } +// } + pub enum Vector<'a> { String(Vec<&'a Option>), Float(Vec), @@ -82,6 +135,16 @@ impl<'a> Vector<'a> { Self::Integer(v) => v.len(), } } + + pub fn get(&self, i: usize) -> Scalar<'a> { + match self { + // FIXME(edd): SORT THIS OPTION OUT + Self::String(v) => Scalar::String(v[i].as_ref().unwrap()), + Self::Float(v) => Scalar::Float(v[i]), + Self::Integer(v) => Scalar::Integer(v[i]), + } + } + pub fn extend(&mut self, other: Self) { match self { Self::String(v) => { @@ -109,6 +172,33 @@ impl<'a> Vector<'a> { } } +/// VectorIterator allows a `Vector` to be iterated. Until vectors are drained +/// Scalar values are emitted. +pub struct VectorIterator<'a> { + v: Vector<'a>, + next_i: usize, +} + +impl<'a> VectorIterator<'a> { + pub fn new(v: Vector<'a>) -> Self { + Self { v, next_i: 0 } + } +} +impl<'a> Iterator for VectorIterator<'a> { + type Item = Scalar<'a>; + + fn next(&mut self) -> Option { + let curr_i = self.next_i; + self.next_i += 1; + + if curr_i == self.v.len() { + return None; + } + + Some(self.v.get(curr_i)) + } +} + use chrono::prelude::*; impl<'a> std::fmt::Display for Vector<'a> { @@ -182,6 +272,37 @@ impl Column { } } + /// Materialise all of the decoded values matching the provided logical + /// row ids. + pub fn values(&self, row_ids: &croaring::Bitmap) -> Vector { + match self { + Column::String(c) => { + if row_ids.is_empty() { + return Vector::String(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::String(c.values(&row_id_vec)) + } + Column::Float(c) => { + if row_ids.is_empty() { + return Vector::Float(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::Float(c.values(&row_id_vec)) + } + Column::Integer(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + let row_id_vec = row_ids.to_vec(); + Vector::Integer(c.values(&row_id_vec)) + } + } + } + /// Materialise all of the encoded values matching the provided logical /// row ids. pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 6871dc9e20..f4a9d2c1fa 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use super::column; use super::column::Column; @@ -148,9 +148,10 @@ impl Segment { &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], - group_columns: Vec, - aggregates: &Vec<(String, Aggregate)>, - ) -> BTreeMap, Vec<(String, Aggregate)>> { + group_columns: &[String], + aggregates: &[(String, Aggregate)], + ) -> BTreeMap, Vec<(String, Option)>> { + // println!("working segment {:?}", time_range); // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -162,16 +163,163 @@ impl Segment { } else { return BTreeMap::new(); } + let total_rows = &filtered_row_ids.cardinality(); + // println!("TOTAL FILTERED ROWS {:?}", total_rows); - // materialise all encoded values for the matching rows. - // let mut column_encoded_values = Vec::with_capacity(group_columns.len()); + // materialise all encoded values for the matching rows in the columns + // we are grouping on and store each group as an iterator. + let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { - // if let Some(column) = self.column(&group_column) { - // column_encoded_values.push(Some(column.encoded_values(&filtered_row_ids)); - // } else { - // column_encoded_values.push(None); - // } + if let Some(column) = self.column(&group_column) { + let encoded_values: Vec; + if let column::Vector::Integer(vector) = column.encoded_values(&filtered_row_ids) { + encoded_values = vector; + } else { + unimplemented!("currently you can only group on encoded string columns"); + } + + assert_eq!( + filtered_row_ids.cardinality() as usize, + encoded_values.len() + ); + group_column_encoded_values.push(Some(encoded_values)); + } else { + group_column_encoded_values.push(None); + } } + // println!("grouped columns {:?}", group_column_encoded_values); + + // TODO(edd): we could do this with an iterator I expect. + // + // materialise all decoded values for the rows in the columns we are + // aggregating on. + let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); + for (column_name, _) in aggregates { + if let Some(column) = self.column(&column_name) { + let decoded_values = column.values(&filtered_row_ids); + assert_eq!( + filtered_row_ids.cardinality() as usize, + decoded_values.len() + ); + aggregate_column_decoded_values.push((column_name, Some(decoded_values))); + } else { + aggregate_column_decoded_values.push((column_name, None)); + } + } + + // now we have all the matching rows for each grouping column and each aggregation + // column. Materialised values for grouping are in encoded form. + // + // Next we iterate all rows in all columns and create a hash entry with + // running aggregates. + + // First we will build a collection of iterators over the columns we + // are grouping on. For columns that have no matching rows from the + // filtering stage we will just emit None. + let mut group_itrs = group_column_encoded_values + .iter() + .map(|x| match x { + Some(values) => Some(values.iter()), + None => None, + }) + .collect::>(); + + // Next we will build a collection of iterators over the columns we + // are aggregating on. For columns that have no matching rows from the + // filtering stage we will just emit None. + let mut aggregate_itrs = aggregate_column_decoded_values + .into_iter() + .map(|(col_name, values)| match values { + Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), + None => (col_name.as_str(), None), + }) + .collect::>(); + + let mut hash_table: HashMap< + Vec>, + Vec<(&String, &Aggregate, Option)>, + > = HashMap::with_capacity(30000); + + let mut aggregate_row: Vec<(&str, Option)> = + std::iter::repeat_with(|| ("", None)) + .take(aggregate_itrs.len()) + .collect(); + + let mut processed_rows = 0; + while processed_rows < *total_rows { + let group_row: Vec> = group_itrs + .iter_mut() + .map(|x| match x { + Some(itr) => itr.next(), + None => None, + }) + .collect(); + + // let aggregate_row: Vec<(&str, Option)> = aggregate_itrs + // .iter_mut() + // .map(|&mut (col_name, ref mut itr)| match itr { + // Some(itr) => (col_name, itr.next()), + // None => (col_name, None), + // }) + // .collect(); + + // re-use aggregate_row vector. + for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { + match itr { + Some(itr) => aggregate_row[i] = (col_name, itr.next()), + None => aggregate_row[i] = (col_name, None), + } + } + + // Lookup the group key in the hash map - if it's empty then insert + // a place-holder for each aggregate being executed. + let group_key_entry = hash_table.entry(group_row).or_insert_with(|| { + // TODO COULD BE MAP/COLLECT + let mut agg_results: Vec<(&String, &Aggregate, Option)> = + Vec::with_capacity(aggregates.len()); + for (col_name, agg_type) in aggregates { + agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option + } + agg_results + }); + + // Update aggregates - we process each row value and for each one + // check which aggregates apply to it. + // + // TODO(edd): this is probably a bit of a perf suck. + for (col_name, row_value) in &aggregate_row { + for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in + group_key_entry.iter_mut() + { + if col_name != cum_col_name { + continue; + } + + // TODO(edd): remove unwrap - it should work because we are + // tracking iteration count in loop. + let row_value = row_value.as_ref().unwrap(); + + match cum_agg_value { + Some(agg) => match agg { + column::Aggregate::Count(cum_count) => { + *cum_count += 1; + } + column::Aggregate::Sum(cum_sum) => { + *cum_sum += row_value; + } + }, + None => { + *cum_agg_value = match agg_type { + Aggregate::Count => Some(column::Aggregate::Count(0)), + Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())), + } + } + } + } + } + processed_rows += 1; + } + // println!("{:?}", hash_table.len()); BTreeMap::new() } @@ -476,46 +624,47 @@ impl<'a> Segments<'a> { group_columns: Vec, aggregates: Vec<(String, Aggregate)>, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { - // TODO(edd): support multi column groups - assert_eq!(group_columns.len(), 1); - let (min, max) = time_range; if max <= min { panic!("max <= min"); } + for segment in &self.segments { + segment.aggregate_by_groups(time_range, predicates, &group_columns, &aggregates); + } + let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = BTreeMap::new(); - for segment in &self.segments { - let segment_results = segment.group_agg_by_predicate_eq( - time_range, - predicates, - &group_columns, - &aggregates, - ); + // for segment in &self.segments { + // let segment_results = segment.group_agg_by_predicate_eq( + // time_range, + // predicates, + // &group_columns, + // &aggregates, + // ); - for (k, segment_aggs) in segment_results { - // assert_eq!(v.len(), aggregates.len()); - let cum_result = cum_results.get_mut(&k); - match cum_result { - Some(cum) => { - assert_eq!(cum.len(), segment_aggs.len()); - // In this case we need to aggregate the aggregates from - // each segment. - for i in 0..cum.len() { - // TODO(edd): this is more expensive than necessary - cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1); - } - } - None => { - cum_results.insert(k, segment_aggs); - } - } - } - } + // for (k, segment_aggs) in segment_results { + // // assert_eq!(v.len(), aggregates.len()); + // let cum_result = cum_results.get_mut(&k); + // match cum_result { + // Some(cum) => { + // assert_eq!(cum.len(), segment_aggs.len()); + // // In this case we need to aggregate the aggregates from + // // each segment. + // for i in 0..cum.len() { + // // TODO(edd): this is more expensive than necessary + // cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1); + // } + // } + // None => { + // cum_results.insert(k, segment_aggs); + // } + // } + // } + // } - // columns + // // columns cum_results } From b994831163dfea083c079e92041809b8cfbc4aab Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 13 Aug 2020 10:47:35 +0100 Subject: [PATCH 20/73] refactor: decoded encoded ids --- delorean_mem_qe/src/column.rs | 27 +++++++++++++++++++++++++++ delorean_mem_qe/src/encoding.rs | 7 +++++++ 2 files changed, 34 insertions(+) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index ec62fd8104..c4bca59542 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -334,6 +334,26 @@ impl Column { } } + /// Given an encoded value for a row, materialise and return the decoded + /// version. + /// + /// This currently just supports decoding integer scalars back into dictionary + /// strings. + pub fn decode_value(&self, encoded_id: i64) -> std::string::String { + match self { + Column::String(c) => { + // FIX THIS UNWRAP AND HOPE THERE ARE NO NULL VALUES! + c.decode_id(encoded_id).unwrap() + } + Column::Float(c) => { + unreachable!("this isn't supported right now"); + } + Column::Integer(c) => { + unreachable!("this isn't supported right now"); + } + } + } + /// materialise rows for each row_id pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { let row_ids_vec = row_ids.to_vec(); @@ -718,6 +738,13 @@ impl String { self.data.encoded_values(row_ids) } + /// Return the decoded value for an encoded ID. + /// + /// Panics if there is no decoded value for the provided id + pub fn decode_id(&self, encoded_id: i64) -> Option { + self.data.decode_id(encoded_id as usize) + } + pub fn scan_from(&self, row_id: usize) -> Vec<&Option> { self.data.scan_from(row_id) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index d159a557f7..7285c1268a 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -442,6 +442,13 @@ impl DictionaryRLE { out } + /// Return the decoded value for an encoded ID. + /// + /// Panics if there is no decoded value for the provided id + pub fn decode_id(&self, encoded_id: usize) -> Option { + self.index_entry.get(&encoded_id).unwrap().clone() + } + /// Return the raw encoded values for the provided logical row ids. /// /// TODO(edd): return type is wrong but I'm making it fit From c1cbbf18f8244750a0164b8a07607d609ff8d10f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 13 Aug 2020 11:38:27 +0100 Subject: [PATCH 21/73] fix: column sorting pre-check --- delorean_table/src/sorter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs index 725a12f30e..fb0add52e7 100644 --- a/delorean_table/src/sorter.rs +++ b/delorean_table/src/sorter.rs @@ -67,7 +67,7 @@ pub fn sort(packers: &mut [Packers], sort_by: &[usize]) -> Result<(), Error> { if n > SORTED_CHECK_SIZE { let mut sorted = true; for i in 1..n { - if cmp(packers, 0, i, sort_by) != Ordering::Equal { + if cmp(packers, i - 1, i, sort_by) == Ordering::Greater { sorted = false; break; } From d70d5dde9de94f9ea51eedeb7eea486b00d3b1b4 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 13 Aug 2020 15:54:11 +0100 Subject: [PATCH 22/73] feat: support pre-populating dictionary --- delorean_mem_qe/src/encoding.rs | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 7285c1268a..cc231e087c 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -267,6 +267,31 @@ impl DictionaryRLE { } } + pub fn with_dictionary(dictionary: BTreeSet>) -> Self { + let mut _self = Self { + entry_index: BTreeMap::new(), + entry_row_ids: BTreeMap::new(), + index_entry: BTreeMap::new(), + map_size: 0, + run_lengths: Vec::new(), + run_length_size: 0, + total: 0, + }; + + for (next_idx, k) in dictionary.iter().enumerate() { + _self.entry_index.insert(k.to_owned(), next_idx); + _self.index_entry.insert(next_idx, k.to_owned()); + + _self + .entry_row_ids + .insert(k.to_owned(), croaring::Bitmap::create()); + + _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta + } + + _self + } + pub fn push(&mut self, v: &str) { self.push_additional(Some(v.to_owned()), 1); } @@ -380,11 +405,8 @@ impl DictionaryRLE { // unreachable!("for now"); // } - pub fn dictionary(&self) -> BTreeSet> { - self.entry_index - .keys() - .cloned() - .collect::>>() + pub fn dictionary(&self) -> BTreeMap, usize> { + self.entry_index.clone() } // get the logical value at the provided index, or None if there is no value From 231f429a56402629bb2ab34af4a2cc97bed1da2d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 13 Aug 2020 21:08:41 +0100 Subject: [PATCH 23/73] feat: sort group by measurement --- Cargo.lock | 1 + delorean_mem_qe/Cargo.toml | 5 + delorean_mem_qe/src/bin/main.rs | 35 +++- delorean_mem_qe/src/column.rs | 145 +++++++++++++--- delorean_mem_qe/src/encoding.rs | 57 +++--- delorean_mem_qe/src/segment.rs | 295 ++++++++++++++++++++++++++++++-- delorean_table/src/sorter.rs | 44 ++++- 7 files changed, 513 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index effb3598b4..4ccedecd78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -767,6 +767,7 @@ dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", "chrono", "croaring", + "crossbeam", "delorean_table", "snafu", ] diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 32531e888b..7f8ac255a3 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -6,11 +6,16 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[profile.release] +debug = true + [dependencies] delorean_table = { path = "../delorean_table" } arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } snafu = "0.6.8" croaring = "0.4.5" +crossbeam = "0.7.3" chrono = "0.4" [dev-dependencies] diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index e6bd236c26..aa3bbe4654 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -111,9 +111,9 @@ fn main() { let now = std::time::Instant::now(); let segments = store.segments(); let groups = segments.read_group_eq( - (0, 1590044410000000), + (0, 1890040790000000), &[], - vec!["env".to_string(), "status".to_string()], + vec!["env".to_string(), "role".to_string()], vec![ ("counter".to_string(), Aggregate::Sum), // ("counter".to_string(), Aggregate::Count), @@ -159,12 +159,12 @@ fn build_store( mut reader: arrow::ipc::reader::StreamReader, store: &mut Store, ) -> Result<(), Error> { - // let mut i = 0; + let mut i = 0; while let Some(rb) = reader.next_batch().unwrap() { - // if i < 364 { - // i += 1; - // continue; - // } + if i < 363 { + i += 1; + continue; + } let segment = convert_record_batch(rb)?; store.add_segment(segment); } @@ -204,7 +204,26 @@ fn convert_record_batch(rb: RecordBatch) -> Result { .downcast_ref::() .unwrap(); - let mut c = column::String::default(); + // IMPORTANT - build a set of values (dictionary) ahead of + // time so we can ensure we encoded the column in an ordinally + // correct manner. + // + // We can use a trick where encoded integers are ordered according + // to the decoded values, making sorting, comparison and grouping + // more efficient. + // + let mut dictionary: std::collections::BTreeSet> = + std::collections::BTreeSet::new(); + for j in 1..arr.len() { + let next = if column.is_null(j) { + None + } else { + Some(arr.value(j).to_string()) + }; + dictionary.insert(next); + } + + let mut c = column::String::with_dictionary(dictionary); let mut prev = if !column.is_null(0) { Some(arr.value(0)) } else { diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index c4bca59542..08d95165b5 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -274,14 +274,53 @@ impl Column { /// Materialise all of the decoded values matching the provided logical /// row ids. - pub fn values(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn values(&self, row_ids: &[usize]) -> Vector { match self { Column::String(c) => { if row_ids.is_empty() { return Vector::String(vec![]); } - let row_id_vec = row_ids.to_vec(); + Vector::String(c.values(row_ids)) + } + Column::Float(c) => { + if row_ids.is_empty() { + return Vector::Float(vec![]); + } + + let now = std::time::Instant::now(); + let v = c.values(row_ids); + println!("time getting decoded values for float {:?}", now.elapsed()); + + Vector::Float(v) + } + Column::Integer(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + let now = std::time::Instant::now(); + let v = c.values(row_ids); + println!("time getting decoded values for int {:?}", now.elapsed()); + Vector::Integer(v) + } + } + } + + /// Materialise all of the decoded values matching the provided logical + /// row ids within the bitmap + pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { + match self { + Column::String(c) => { + if row_ids.is_empty() { + return Vector::String(vec![]); + } + + let row_id_vec = row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); Vector::String(c.values(&row_id_vec)) } Column::Float(c) => { @@ -289,7 +328,11 @@ impl Column { return Vector::Float(vec![]); } - let row_id_vec = row_ids.to_vec(); + let row_id_vec = row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); Vector::Float(c.values(&row_id_vec)) } Column::Integer(c) => { @@ -297,7 +340,11 @@ impl Column { return Vector::Integer(vec![]); } - let row_id_vec = row_ids.to_vec(); + let row_id_vec = row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); Vector::Integer(c.values(&row_id_vec)) } } @@ -305,31 +352,70 @@ impl Column { /// Materialise all of the encoded values matching the provided logical /// row ids. - pub fn encoded_values(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { + let now = std::time::Instant::now(); + let row_ids_vec = row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + println!("time unpacking bitmap {:?}", now.elapsed()); + match self { Column::String(c) => { if row_ids.is_empty() { return Vector::Integer(vec![]); } - let row_id_vec = row_ids.to_vec(); - Vector::Integer(c.encoded_values(&row_id_vec)) + let now = std::time::Instant::now(); + let v = c.encoded_values(&row_ids_vec); + println!("time getting encoded values {:?}", now.elapsed()); + Vector::Integer(v) } Column::Float(c) => { if row_ids.is_empty() { return Vector::Float(vec![]); } - let row_id_vec = row_ids.to_vec(); - Vector::Float(c.encoded_values(&row_id_vec)) + Vector::Float(c.encoded_values(&row_ids_vec)) } Column::Integer(c) => { if row_ids.is_empty() { return Vector::Integer(vec![]); } - let row_id_vec = row_ids.to_vec(); - Vector::Integer(c.encoded_values(&row_id_vec)) + Vector::Integer(c.encoded_values(&row_ids_vec)) + } + } + } + + /// Materialise all of the encoded values matching the provided logical + /// row ids. + pub fn encoded_values(&self, row_ids: &[usize]) -> Vector { + match self { + Column::String(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + let now = std::time::Instant::now(); + let v = c.encoded_values(&row_ids); + println!("time getting encoded values {:?}", now.elapsed()); + Vector::Integer(v) + } + Column::Float(c) => { + if row_ids.is_empty() { + return Vector::Float(vec![]); + } + + Vector::Float(c.encoded_values(&row_ids)) + } + Column::Integer(c) => { + if row_ids.is_empty() { + return Vector::Integer(vec![]); + } + + Vector::Integer(c.encoded_values(&row_ids)) } } } @@ -356,7 +442,14 @@ impl Column { /// materialise rows for each row_id pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { - let row_ids_vec = row_ids.to_vec(); + let now = std::time::Instant::now(); + let row_ids_vec = row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + println!("time unpacking bitmap {:?}", now.elapsed()); + assert!( row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0], "got last row_id={:?} and first row_id={:?}", @@ -555,9 +648,7 @@ impl Column { } } - pub fn group_by_ids( - &self, - ) -> &std::collections::BTreeMap, croaring::Bitmap> { + pub fn group_by_ids(&self) -> &std::collections::BTreeMap { match self { Column::String(c) => c.data.group_row_ids(), Column::Float(_) => unimplemented!("not implemented"), @@ -708,6 +799,14 @@ pub struct String { } impl String { + pub fn with_dictionary( + dictionary: std::collections::BTreeSet>, + ) -> Self { + let mut c = Self::default(); + c.data = encoding::DictionaryRLE::with_dictionary(dictionary); + c + } + pub fn add(&mut self, s: &str) { self.meta.add(Some(s.to_string())); self.data.push(s); @@ -730,11 +829,11 @@ impl String { self.data.value(row_id) } - pub fn values(&self, row_ids: &[u32]) -> Vec<&Option> { + pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.data.encoded_values(row_ids) } @@ -755,9 +854,7 @@ impl String { } // TODO(edd) shouldn't let roaring stuff leak out... - pub fn group_row_ids( - &self, - ) -> &std::collections::BTreeMap, croaring::Bitmap> { + pub fn group_row_ids(&self) -> &std::collections::BTreeMap { self.data.group_row_ids() } } @@ -783,11 +880,11 @@ impl Float { self.data.value(row_id) } - pub fn values(&self, row_ids: &[u32]) -> Vec { + pub fn values(&self, row_ids: &[usize]) -> Vec { self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.data.encoded_values(row_ids) } @@ -844,11 +941,11 @@ impl Integer { self.data.value(row_id) } - pub fn values(&self, row_ids: &[u32]) -> Vec { + pub fn values(&self, row_ids: &[usize]) -> Vec { self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.data.encoded_values(row_ids) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index cc231e087c..de7090115c 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -44,17 +44,26 @@ where } /// Return the decoded values for the provided logical row ids. - pub fn values(&self, row_ids: &[u32]) -> Vec { + pub fn values(&self, row_ids: &[usize]) -> Vec { let mut out = Vec::with_capacity(row_ids.len()); - for row_id in row_ids { - out.push(self.values[*row_id as usize]); + for chunks in row_ids.chunks_exact(4) { + out.push(self.values[chunks[3]]); + out.push(self.values[chunks[2]]); + out.push(self.values[chunks[1]]); + out.push(self.values[chunks[0]]); + // out.push(self.values[row_id]); + } + + let rem = row_ids.len() % 4; + for &i in &row_ids[row_ids.len() - rem..row_ids.len()] { + out.push(self.values[i]); } out } /// Return the raw encoded values for the provided logical row ids. For Plain /// encoding this is just the decoded values. - pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.values(row_ids) } @@ -238,12 +247,12 @@ pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. entry_index: BTreeMap, usize>, - // Experiment - store rows that each entry has a value for - entry_row_ids: BTreeMap, croaring::Bitmap>, - // stores the mapping between an index and its entry. index_entry: BTreeMap>, + // Experiment - store rows that each entry has a value for + index_row_ids: BTreeMap, + map_size: usize, // TODO(edd) this isn't perfect at all // stores tuples where each pair refers to a dictionary entry and the number @@ -258,7 +267,7 @@ impl DictionaryRLE { pub fn new() -> Self { Self { entry_index: BTreeMap::new(), - entry_row_ids: BTreeMap::new(), + index_row_ids: BTreeMap::new(), index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), @@ -270,7 +279,7 @@ impl DictionaryRLE { pub fn with_dictionary(dictionary: BTreeSet>) -> Self { let mut _self = Self { entry_index: BTreeMap::new(), - entry_row_ids: BTreeMap::new(), + index_row_ids: BTreeMap::new(), index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), @@ -283,8 +292,8 @@ impl DictionaryRLE { _self.index_entry.insert(next_idx, k.to_owned()); _self - .entry_row_ids - .insert(k.to_owned(), croaring::Bitmap::create()); + .index_row_ids + .insert(next_idx as u32, croaring::Bitmap::create()); _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta } @@ -313,8 +322,8 @@ impl DictionaryRLE { self.run_lengths.push((*idx, additional)); self.run_length_size += std::mem::size_of::<(usize, u64)>(); } - self.entry_row_ids - .get_mut(&v) + self.index_row_ids + .get_mut(&(*idx as u32)) .unwrap() .add_range(self.total..self.total + additional); } @@ -325,8 +334,8 @@ impl DictionaryRLE { let idx = self.entry_index.len(); self.entry_index.insert(v.clone(), idx); - self.entry_row_ids - .insert(v.clone(), croaring::Bitmap::create()); + self.index_row_ids + .insert(idx as u32, croaring::Bitmap::create()); if let Some(value) = &v { self.map_size += value.len(); } @@ -334,8 +343,8 @@ impl DictionaryRLE { self.map_size += 8 + std::mem::size_of::(); // TODO(edd): clean this option size up self.run_lengths.push((idx, additional)); - self.entry_row_ids - .get_mut(&v) + self.index_row_ids + .get_mut(&(idx as u32)) .unwrap() .add_range(self.total..self.total + additional); self.run_length_size += std::mem::size_of::<(usize, u64)>(); @@ -380,8 +389,8 @@ impl DictionaryRLE { } // get the set of row ids for each distinct value - pub fn group_row_ids(&self) -> &BTreeMap, croaring::Bitmap> { - &self.entry_row_ids + pub fn group_row_ids(&self) -> &BTreeMap { + &self.index_row_ids } // row_ids returns an iterator over the set of row ids matching the provided @@ -431,7 +440,7 @@ impl DictionaryRLE { // materialises a vector of references to the decoded values in the // each provided row_id. - pub fn values(&self, row_ids: &[u32]) -> Vec<&Option> { + pub fn values(&self, row_ids: &[usize]) -> Vec<&Option> { let mut out: Vec<&Option> = Vec::with_capacity(row_ids.len()); let mut curr_logical_row_id = 0; @@ -475,7 +484,7 @@ impl DictionaryRLE { /// /// TODO(edd): return type is wrong but I'm making it fit /// - pub fn encoded_values(&self, row_ids: &[u32]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { let mut out: Vec = Vec::with_capacity(row_ids.len()); let mut curr_logical_row_id = 0; @@ -706,21 +715,21 @@ mod test { assert_eq!(drle.value(8).unwrap(), "zoo"); let row_ids = drle - .entry_row_ids + .index_row_ids .get(&Some("hello".to_string())) .unwrap() .to_vec(); assert_eq!(row_ids, vec![0, 1, 3, 4, 5]); let row_ids = drle - .entry_row_ids + .index_row_ids .get(&Some("world".to_string())) .unwrap() .to_vec(); assert_eq!(row_ids, vec![2]); let row_ids = drle - .entry_row_ids + .index_row_ids .get(&Some("zoo".to_string())) .unwrap() .to_vec(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index f4a9d2c1fa..cf04671683 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -138,13 +138,14 @@ impl Segment { &self, name: &str, ) -> Option<&std::collections::BTreeMap, croaring::Bitmap>> { - if let Some(c) = self.column(name) { - return Some(c.group_by_ids()); - } - None + unimplemented!("just need to convert encoded keys into decoded strings"); + // if let Some(c) = self.column(name) { + // return Some(c.group_by_ids()); + // } + // None } - pub fn aggregate_by_groups( + pub fn aggregate_by_group_with_hash( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], @@ -164,6 +165,13 @@ impl Segment { return BTreeMap::new(); } let total_rows = &filtered_row_ids.cardinality(); + + // materialise the row ids we need to filter on as a vec. + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); // println!("TOTAL FILTERED ROWS {:?}", total_rows); // materialise all encoded values for the matching rows in the columns @@ -172,7 +180,9 @@ impl Segment { for group_column in group_columns { if let Some(column) = self.column(&group_column) { let encoded_values: Vec; - if let column::Vector::Integer(vector) = column.encoded_values(&filtered_row_ids) { + if let column::Vector::Integer(vector) = + column.encoded_values(&filtered_row_ids_vec) + { encoded_values = vector; } else { unimplemented!("currently you can only group on encoded string columns"); @@ -196,7 +206,7 @@ impl Segment { let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); for (column_name, _) in aggregates { if let Some(column) = self.column(&column_name) { - let decoded_values = column.values(&filtered_row_ids); + let decoded_values = column.values(&filtered_row_ids_vec); assert_eq!( filtered_row_ids.cardinality() as usize, decoded_values.len() @@ -323,6 +333,235 @@ impl Segment { BTreeMap::new() } + pub fn aggregate_by_group_with_sort( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, Aggregate)], + ) -> BTreeMap, Vec<(String, Option)>> { + // filter on predicates and time + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { + filtered_row_ids = row_ids; + } else { + return BTreeMap::new(); + } + let total_rows = &filtered_row_ids.cardinality(); + // println!("TOTAL FILTERED ROWS {:?}", total_rows); + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + + // materialise all encoded values for the matching rows in the columns + // we are grouping on and store each group as an iterator. + let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); + for group_column in group_columns { + if let Some(column) = self.column(&group_column) { + let encoded_values: delorean_table::Packer; + if let column::Vector::Integer(vector) = + column.encoded_values(&filtered_row_ids_vec) + { + encoded_values = delorean_table::Packer::from(vector); + } else { + unimplemented!("currently you can only group on encoded string columns"); + } + + assert_eq!( + filtered_row_ids.cardinality() as usize, + encoded_values.num_rows() + ); + group_column_encoded_values.push(Some(encoded_values)); + } else { + group_column_encoded_values.push(None); + } + } + // println!("grouped columns {:?}", group_column_encoded_values); + + // TODO(edd): we could do this with an iterator I expect. + // + // materialise all decoded values for the rows in the columns we are + // aggregating on. + let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); + for (column_name, _) in aggregates { + if let Some(column) = self.column(&column_name) { + let decoded_values = match column.values(&filtered_row_ids_vec) { + column::Vector::String(_) => unreachable!("not supported"), + column::Vector::Float(v) => delorean_table::Packers::from(v), + column::Vector::Integer(v) => delorean_table::Packers::from(v), + }; + + assert_eq!( + filtered_row_ids.cardinality() as usize, + decoded_values.num_rows() + ); + aggregate_column_decoded_values.push((column_name, Some(decoded_values))); + } else { + aggregate_column_decoded_values.push((column_name, None)); + } + } + + let mut all_columns = Vec::with_capacity( + group_column_encoded_values.len() + aggregate_column_decoded_values.len(), + ); + + for gc in group_column_encoded_values { + if let Some(p) = gc { + all_columns.push(delorean_table::Packers::Integer(p)); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + for ac in aggregate_column_decoded_values { + if let (_, Some(p)) = ac { + all_columns.push(p); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + // now sort on the first grouping columns. Right now the order doesn't matter... + let now = std::time::Instant::now(); + delorean_table::sorter::sort(&mut all_columns, &[0, 1]).unwrap(); + println!("time checking sort {:?}", now.elapsed()); + + let mut group_itrs = all_columns + .iter() + .take(group_columns.len()) // only use grouping columns + .map(|x| match x { + delorean_table::Packers::Integer(p) => p.iter(), + _ => { + panic!("not here {:?} ", x); + } + }) + .collect::>(); + + + + // now we have all the matching rows for each grouping column and each aggregation + // column. Materialised values for grouping are in encoded form. + // + // Next we sort all columns according to the group key. + // let mut all_columns = vec![]; + // for + // delorean_table::packers::Packers + // // First we will build a collection of iterators over the columns we + // // are grouping on. For columns that have no matching rows from the + // // filtering stage we will just emit None. + + // // Next we will build a collection of iterators over the columns we + // // are aggregating on. For columns that have no matching rows from the + // // filtering stage we will just emit None. + // let mut aggregate_itrs = aggregate_column_decoded_values + // .into_iter() + // .map(|(col_name, values)| match values { + // Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), + // None => (col_name.as_str(), None), + // }) + // .collect::>(); + + // let mut hash_table: HashMap< + // Vec>, + // Vec<(&String, &Aggregate, Option)>, + // > = HashMap::with_capacity(30000); + + let mut aggregate_row: Vec<(&str, Option)> = agg + + let mut processed_rows = 0; + let last_group_row: Vec> = group_itrs + .iter_mut() + .map(|itr| itr.next().unwrap()) + .collect(); + + while processed_rows < *total_rows { + // let group_row: Vec> = group_itrs + // .iter_mut() + // .map(|x| match x { + // Some(itr) => itr.next().unwrap(), + // None => None, + // }) + // .collect(); + + // check if group key has changed + for (&curr_v, itr) in last_group_row.iter().zip(&mut group_itrs) { + if curr_v != itr.next().unwrap() { + // group key changed + } + } + + // group key is the same - update aggregates + + // // let aggregate_row: Vec<(&str, Option)> = aggregate_itrs + // // .iter_mut() + // // .map(|&mut (col_name, ref mut itr)| match itr { + // // Some(itr) => (col_name, itr.next()), + // // None => (col_name, None), + // // }) + // // .collect(); + + // // re-use aggregate_row vector. + // for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { + // match itr { + // Some(itr) => aggregate_row[i] = (col_name, itr.next()), + // None => aggregate_row[i] = (col_name, None), + // } + // } + + // // Lookup the group key in the hash map - if it's empty then insert + // // a place-holder for each aggregate being executed. + // let group_key_entry = hash_table.entry(group_row).or_insert_with(|| { + // // TODO COULD BE MAP/COLLECT + // let mut agg_results: Vec<(&String, &Aggregate, Option)> = + // Vec::with_capacity(aggregates.len()); + // for (col_name, agg_type) in aggregates { + // agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option + // } + // agg_results + // }); + + // // Update aggregates - we process each row value and for each one + // // check which aggregates apply to it. + // // + // // TODO(edd): this is probably a bit of a perf suck. + // for (col_name, row_value) in &aggregate_row { + // for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in + // group_key_entry.iter_mut() + // { + // if col_name != cum_col_name { + // continue; + // } + + // // TODO(edd): remove unwrap - it should work because we are + // // tracking iteration count in loop. + // let row_value = row_value.as_ref().unwrap(); + + // match cum_agg_value { + // Some(agg) => match agg { + // column::Aggregate::Count(cum_count) => { + // *cum_count += 1; + // } + // column::Aggregate::Sum(cum_sum) => { + // *cum_sum += row_value; + // } + // }, + // None => { + // *cum_agg_value = match agg_type { + // Aggregate::Count => Some(column::Aggregate::Count(0)), + // Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())), + // } + // } + // } + // } + // } + processed_rows += 1; + } + // println!("{:?}", hash_table.len()); + BTreeMap::new() + } pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); @@ -442,11 +681,11 @@ impl Segment { bm } - pub fn group_agg_by_predicate_eq( + pub fn group_single_agg_by_predicate_eq( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], - group_columns: &Vec, + group_column: &String, aggregates: &Vec<(String, Aggregate)>, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { let mut grouped_results = BTreeMap::new(); @@ -459,7 +698,7 @@ impl Segment { } } - if let Some(grouped_row_ids) = self.group_by_column_ids(&group_columns[0]) { + if let Some(grouped_row_ids) = self.group_by_column_ids(group_column) { for (group_key_value, row_ids) in grouped_row_ids.iter() { let mut filtered_row_ids = row_ids.and(&filter_row_ids); if !filtered_row_ids.is_empty() { @@ -630,9 +869,43 @@ impl<'a> Segments<'a> { } for segment in &self.segments { - segment.aggregate_by_groups(time_range, predicates, &group_columns, &aggregates); + // // segment.aggregate_by_group_with_hash( + // // time_range, + // // predicates, + // // &group_columns, + // // &aggregates, + // // ); + + segment.aggregate_by_group_with_sort( + time_range, + predicates, + &group_columns, + &aggregates, + ); } + // let group_columns_arc = std::sync::Arc::new(group_columns); + // let aggregates_arc = std::sync::Arc::new(aggregates); + + // for chunked_segments in self.segments.chunks(12) { + // crossbeam::scope(|scope| { + // for segment in chunked_segments { + // let group_columns = group_columns_arc.clone(); + // let aggregates = aggregates_arc.clone(); + + // scope.spawn(move |_| { + // segment.aggregate_by_group_with_sort( + // time_range, + // predicates, + // &group_columns, + // &aggregates, + // ); + // }); + // } + // }) + // .unwrap(); + // } + let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = BTreeMap::new(); diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs index fb0add52e7..a78be80bc0 100644 --- a/delorean_table/src/sorter.rs +++ b/delorean_table/src/sorter.rs @@ -76,9 +76,14 @@ pub fn sort(packers: &mut [Packers], sort_by: &[usize]) -> Result<(), Error> { if sorted { return Ok(()); } + // if packers_sorted_asc(packers, n, sort_by) { + // return Ok(()); + // } + // return Ok(()); } - + let now = std::time::Instant::now(); quicksort_by(packers, 0..n - 1, sort_by); + println!("sorted in {:?}", now.elapsed()); Ok(()) } @@ -152,9 +157,9 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering { Packers::Integer(p) => { let cmp = p.get(a).cmp(&p.get(b)); if cmp != Ordering::Equal { - // if cmp equal then try next packer column. return cmp; } + // if cmp equal then try next packer column. } _ => continue, // don't compare on non-string / timestamp cols } @@ -162,6 +167,41 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering { Ordering::Equal } +fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> bool { + 'row_wise: for i in 1..len { + for &idx in sort_by { + match &packers[idx] { + Packers::String(p) => { + let vec = p.values(); + if vec[i - 1] < vec[i] { + continue 'row_wise; + } else if vec[i - 1] == vec[i] { + // try next column + continue; + } else { + // value is > so + return false; + } + } + Packers::Integer(p) => { + let vec = p.values(); + if vec[i - 1] < vec[i] { + continue 'row_wise; + } else if vec[i - 1] == vec[i] { + // try next column + continue; + } else { + // value is > so + return false; + } + } + _ => continue, // don't compare on non-string / timestamp cols + } + } + } + true +} + // Swap the same pair of elements in each packer column fn swap(packers: &mut [Packers], a: usize, b: usize) { for p in packers { From a5a8667a424e428f3a9481c3e5a306b8c8b54b85 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 14 Aug 2020 11:25:48 +0100 Subject: [PATCH 24/73] feat: group by sorting --- Cargo.lock | 6 +- delorean_mem_qe/Cargo.toml | 5 +- delorean_mem_qe/src/bin/main.rs | 48 +++--- delorean_mem_qe/src/column.rs | 100 +++++++++-- delorean_mem_qe/src/encoding.rs | 1 - delorean_mem_qe/src/lib.rs | 1 + delorean_mem_qe/src/segment.rs | 290 +++++++++++++------------------- delorean_mem_qe/src/sorter.rs | 197 ++++++++++++++++++++++ 8 files changed, 436 insertions(+), 212 deletions(-) create mode 100644 delorean_mem_qe/src/sorter.rs diff --git a/Cargo.lock b/Cargo.lock index 4ccedecd78..fad93f1f3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -769,6 +769,8 @@ dependencies = [ "croaring", "crossbeam", "delorean_table", + "env_logger", + "log", "snafu", ] @@ -1596,9 +1598,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.8" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" dependencies = [ "cfg-if", ] diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 7f8ac255a3..89b9ff1e9a 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -7,9 +7,6 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[profile.release] -debug = true - [dependencies] delorean_table = { path = "../delorean_table" } arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } @@ -17,6 +14,8 @@ snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" chrono = "0.4" +log = "0.4.11" +env_logger = "0.7.1" [dev-dependencies] diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index aa3bbe4654..fa97b6e702 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -24,6 +24,8 @@ pub enum Error { } fn main() { + env_logger::init(); + let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); let reader = ipc::reader::StreamReader::try_new(r).unwrap(); @@ -36,7 +38,7 @@ fn main() { store.size(), ); - // time_group_by_agg(&store); + time_group_by_agg(&store); // time_column_min_time(&store); // time_column_max_time(&store); @@ -107,20 +109,20 @@ fn main() { // println!("ROWS ({}) {:?}", v, v.len()); // } - loop { - let now = std::time::Instant::now(); - let segments = store.segments(); - let groups = segments.read_group_eq( - (0, 1890040790000000), - &[], - vec!["env".to_string(), "role".to_string()], - vec![ - ("counter".to_string(), Aggregate::Sum), - // ("counter".to_string(), Aggregate::Count), - ], - ); - println!("{:?} {:?}", groups, now.elapsed()); - } + // loop { + // let now = std::time::Instant::now(); + // let segments = store.segments(); + // let groups = segments.read_group_eq( + // (0, 1590044410000000), + // &[], + // vec!["env".to_string(), "role".to_string()], + // vec![ + // ("counter".to_string(), Aggregate::Sum), + // // ("counter".to_string(), Aggregate::Count), + // ], + // ); + // println!("{:?} {:?}", groups, now.elapsed()); + // } // loop { // let mut total_count = 0.0; @@ -159,12 +161,12 @@ fn build_store( mut reader: arrow::ipc::reader::StreamReader, store: &mut Store, ) -> Result<(), Error> { - let mut i = 0; + // let mut i = 0; while let Some(rb) = reader.next_batch().unwrap() { - if i < 363 { - i += 1; - continue; - } + // if i < 363 { + // i += 1; + // continue; + // } let segment = convert_record_batch(rb)?; store.add_segment(segment); } @@ -402,7 +404,7 @@ fn time_row_by_preds(store: &Store) { } fn time_group_by_agg(store: &Store) { - let repeat = 100; + let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); @@ -411,8 +413,8 @@ fn time_group_by_agg(store: &Store) { let groups = segments.read_group_eq( (0, 1590044410000000), - &[("method", Some(&column::Scalar::String("GET")))], - vec!["env".to_string(), "status".to_string()], + &[], + vec!["status".to_string(), "method".to_string()], vec![ ("counter".to_string(), Aggregate::Sum), // ("counter".to_string(), Aggregate::Count), diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 08d95165b5..144c2bd6ec 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -9,6 +9,44 @@ pub enum Scalar<'a> { Integer(i64), } +impl<'a> Scalar<'a> { + pub fn reset(&mut self) { + match self { + Scalar::String(s) => { + panic!("not supported"); + } + Scalar::Float(v) => { + *v = 0.0; + } + Scalar::Integer(v) => { + *v = 0; + } + } + } + + pub fn add(&mut self, other: Scalar<'a>) { + match self { + Self::Float(v) => { + if let Self::Float(other) = other { + *v += other; + } else { + panic!("invalid"); + }; + } + Self::Integer(v) => { + if let Self::Integer(other) = other { + *v += other; + } else { + panic!("invalid"); + }; + } + Self::String(_) => { + unreachable!("not possible to add strings"); + } + } + } +} + impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { type Output = Scalar<'a>; @@ -65,6 +103,30 @@ pub enum Aggregate<'a> { Sum(Scalar<'a>), } +impl<'a> Aggregate<'a> { + pub fn update_with(&mut self, other: Scalar<'a>) { + match self { + Self::Count(v) => { + *v = *v + 1; + } + Self::Sum(v) => { + v.add(other); + } + } + } +} + +impl<'a> std::ops::Add> for Aggregate<'a> { + type Output = Aggregate<'a>; + + fn add(self, _rhs: Scalar<'a>) -> Self::Output { + match self { + Self::Count(c) => Self::Count(c + 1), + Self::Sum(s) => Self::Sum(s + &_rhs), + } + } +} + impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { type Output = Aggregate<'a>; @@ -170,17 +232,31 @@ impl<'a> Vector<'a> { } } } + + pub fn swap(&mut self, a: usize, b: usize) { + match self { + Self::String(v) => { + v.swap(a, b); + } + Self::Float(v) => { + v.swap(a, b); + } + Self::Integer(v) => { + v.swap(a, b); + } + } + } } /// VectorIterator allows a `Vector` to be iterated. Until vectors are drained /// Scalar values are emitted. pub struct VectorIterator<'a> { - v: Vector<'a>, + v: &'a Vector<'a>, next_i: usize, } impl<'a> VectorIterator<'a> { - pub fn new(v: Vector<'a>) -> Self { + pub fn new(v: &'a Vector<'a>) -> Self { Self { v, next_i: 0 } } } @@ -290,7 +366,7 @@ impl Column { let now = std::time::Instant::now(); let v = c.values(row_ids); - println!("time getting decoded values for float {:?}", now.elapsed()); + log::debug!("time getting decoded values for float {:?}", now.elapsed()); Vector::Float(v) } @@ -301,7 +377,7 @@ impl Column { let now = std::time::Instant::now(); let v = c.values(row_ids); - println!("time getting decoded values for int {:?}", now.elapsed()); + log::debug!("time getting decoded values for int {:?}", now.elapsed()); Vector::Integer(v) } } @@ -359,7 +435,7 @@ impl Column { .iter() .map(|v| *v as usize) .collect::>(); - println!("time unpacking bitmap {:?}", now.elapsed()); + log::debug!("time unpacking bitmap {:?}", now.elapsed()); match self { Column::String(c) => { @@ -369,7 +445,7 @@ impl Column { let now = std::time::Instant::now(); let v = c.encoded_values(&row_ids_vec); - println!("time getting encoded values {:?}", now.elapsed()); + log::debug!("time getting encoded values {:?}", now.elapsed()); Vector::Integer(v) } Column::Float(c) => { @@ -400,7 +476,9 @@ impl Column { let now = std::time::Instant::now(); let v = c.encoded_values(&row_ids); - println!("time getting encoded values {:?}", now.elapsed()); + log::debug!("time getting encoded values {:?}", now.elapsed()); + + log::debug!("dictionary {:?}", c.data.dictionary()); Vector::Integer(v) } Column::Float(c) => { @@ -448,7 +526,7 @@ impl Column { .iter() .map(|v| *v as usize) .collect::>(); - println!("time unpacking bitmap {:?}", now.elapsed()); + log::debug!("time unpacking bitmap {:?}", now.elapsed()); assert!( row_ids_vec.len() == 1 || row_ids_vec[row_ids_vec.len() - 1] > row_ids_vec[0], @@ -1060,9 +1138,11 @@ pub mod metadata { pub fn maybe_contains_value(&self, v: f64) -> bool { let res = self.range.0 <= v && v <= self.range.1; - println!( + log::debug!( "column with ({:?}) maybe contain {:?} -- {:?}", - self.range, v, res + self.range, + v, + res ); res } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index de7090115c..4cec3b25f6 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -297,7 +297,6 @@ impl DictionaryRLE { _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta } - _self } diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 26b78d9963..150e5ad588 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -1,6 +1,7 @@ pub mod column; pub mod encoding; pub mod segment; +pub mod sorter; use segment::{Segment, Segments}; diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index cf04671683..8e4189982d 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -238,7 +238,7 @@ impl Segment { // are aggregating on. For columns that have no matching rows from the // filtering stage we will just emit None. let mut aggregate_itrs = aggregate_column_decoded_values - .into_iter() + .iter() .map(|(col_name, values)| match values { Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), None => (col_name.as_str(), None), @@ -329,7 +329,7 @@ impl Segment { } processed_rows += 1; } - // println!("{:?}", hash_table.len()); + log::debug!("{:?}", hash_table); BTreeMap::new() } @@ -339,7 +339,7 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, Aggregate)], - ) -> BTreeMap, Vec<(String, Option)>> { + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -361,24 +361,17 @@ impl Segment { let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { if let Some(column) = self.column(&group_column) { - let encoded_values: delorean_table::Packer; - if let column::Vector::Integer(vector) = - column.encoded_values(&filtered_row_ids_vec) - { - encoded_values = delorean_table::Packer::from(vector); - } else { - unimplemented!("currently you can only group on encoded string columns"); - } - + let encoded_values = column.encoded_values(&filtered_row_ids_vec); assert_eq!( filtered_row_ids.cardinality() as usize, - encoded_values.num_rows() + encoded_values.len() ); group_column_encoded_values.push(Some(encoded_values)); } else { group_column_encoded_values.push(None); } } + let group_col_sort_order = &(0..group_columns.len()).collect::>(); // println!("grouped columns {:?}", group_column_encoded_values); // TODO(edd): we could do this with an iterator I expect. @@ -388,15 +381,10 @@ impl Segment { let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); for (column_name, _) in aggregates { if let Some(column) = self.column(&column_name) { - let decoded_values = match column.values(&filtered_row_ids_vec) { - column::Vector::String(_) => unreachable!("not supported"), - column::Vector::Float(v) => delorean_table::Packers::from(v), - column::Vector::Integer(v) => delorean_table::Packers::from(v), - }; - + let decoded_values = column.values(&filtered_row_ids_vec); assert_eq!( filtered_row_ids.cardinality() as usize, - decoded_values.num_rows() + decoded_values.len() ); aggregate_column_decoded_values.push((column_name, Some(decoded_values))); } else { @@ -410,7 +398,7 @@ impl Segment { for gc in group_column_encoded_values { if let Some(p) = gc { - all_columns.push(delorean_table::Packers::Integer(p)); + all_columns.push(p); } else { panic!("need to handle no results for filtering/grouping..."); } @@ -426,142 +414,106 @@ impl Segment { // now sort on the first grouping columns. Right now the order doesn't matter... let now = std::time::Instant::now(); - delorean_table::sorter::sort(&mut all_columns, &[0, 1]).unwrap(); - println!("time checking sort {:?}", now.elapsed()); + super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); + log::debug!("time checking sort {:?}", now.elapsed()); let mut group_itrs = all_columns .iter() .take(group_columns.len()) // only use grouping columns - .map(|x| match x { - delorean_table::Packers::Integer(p) => p.iter(), - _ => { - panic!("not here {:?} ", x); + .map(|vector| { + if let column::Vector::Integer(v) = vector { + v.iter() + } else { + panic!("don't support grouping on non-encoded values"); } }) .collect::>(); + let mut aggregate_itrs = all_columns + .iter() + .skip(group_columns.len()) // only use grouping columns + .map(|v| column::VectorIterator::new(v)) + .collect::>(); + // this tracks the last seen group key row. When it changes we can emit + // the grouped aggregates. + let mut last_group_row = group_itrs + .iter_mut() + .map(|itr| itr.next().unwrap()) + .collect::>(); - // now we have all the matching rows for each grouping column and each aggregation - // column. Materialised values for grouping are in encoded form. - // - // Next we sort all columns according to the group key. - // let mut all_columns = vec![]; - // for - // delorean_table::packers::Packers - // // First we will build a collection of iterators over the columns we - // // are grouping on. For columns that have no matching rows from the - // // filtering stage we will just emit None. + let mut curr_group_row = last_group_row.clone(); - // // Next we will build a collection of iterators over the columns we - // // are aggregating on. For columns that have no matching rows from the - // // filtering stage we will just emit None. - // let mut aggregate_itrs = aggregate_column_decoded_values - // .into_iter() - // .map(|(col_name, values)| match values { - // Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), - // None => (col_name.as_str(), None), - // }) - // .collect::>(); - - // let mut hash_table: HashMap< - // Vec>, - // Vec<(&String, &Aggregate, Option)>, - // > = HashMap::with_capacity(30000); - - let mut aggregate_row: Vec<(&str, Option)> = agg - - let mut processed_rows = 0; - let last_group_row: Vec> = group_itrs + // this tracks the last row for each column we are aggregating. + let last_agg_row: Vec = aggregate_itrs .iter_mut() .map(|itr| itr.next().unwrap()) .collect(); - while processed_rows < *total_rows { - // let group_row: Vec> = group_itrs - // .iter_mut() - // .map(|x| match x { - // Some(itr) => itr.next().unwrap(), - // None => None, - // }) - // .collect(); + // this keeps the current cumulative aggregates for the columns we + // are aggregating. + let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates + .iter() + .zip(last_agg_row.iter()) + .map(|((col_name, agg_type), curr_agg)| { + let agg = match agg_type { + Aggregate::Count => column::Aggregate::Count(1), + Aggregate::Sum => column::Aggregate::Sum(curr_agg.clone()), + }; + (col_name.clone(), agg) + }) + .collect(); - // check if group key has changed - for (&curr_v, itr) in last_group_row.iter().zip(&mut group_itrs) { - if curr_v != itr.next().unwrap() { - // group key changed + let mut results = BTreeMap::new(); + let mut processed_rows = 1; + while processed_rows < *total_rows { + // update next group key. + let mut group_key_changed = false; + for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { + let next_v = itr.next().unwrap(); + if curr_v != &next_v { + group_key_changed = true; + } + *curr_v = next_v; + } + + // group key changed - emit group row and aggregates. + if group_key_changed { + let key = last_group_row.clone(); + results.insert(key, cum_aggregates.clone()); + + // update group key + last_group_row = curr_group_row.clone(); + + // reset cumulative aggregates + for (_, agg) in cum_aggregates.iter_mut() { + match agg { + column::Aggregate::Count(c) => { + *c = 0; + } + column::Aggregate::Sum(s) => s.reset(), + } } } - // group key is the same - update aggregates + // update aggregates + for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) { + let (_, curr_agg) = bind.0; + let next_value = bind.1.next().unwrap(); + curr_agg.update_with(next_value); + } - // // let aggregate_row: Vec<(&str, Option)> = aggregate_itrs - // // .iter_mut() - // // .map(|&mut (col_name, ref mut itr)| match itr { - // // Some(itr) => (col_name, itr.next()), - // // None => (col_name, None), - // // }) - // // .collect(); - - // // re-use aggregate_row vector. - // for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { - // match itr { - // Some(itr) => aggregate_row[i] = (col_name, itr.next()), - // None => aggregate_row[i] = (col_name, None), - // } - // } - - // // Lookup the group key in the hash map - if it's empty then insert - // // a place-holder for each aggregate being executed. - // let group_key_entry = hash_table.entry(group_row).or_insert_with(|| { - // // TODO COULD BE MAP/COLLECT - // let mut agg_results: Vec<(&String, &Aggregate, Option)> = - // Vec::with_capacity(aggregates.len()); - // for (col_name, agg_type) in aggregates { - // agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option - // } - // agg_results - // }); - - // // Update aggregates - we process each row value and for each one - // // check which aggregates apply to it. - // // - // // TODO(edd): this is probably a bit of a perf suck. - // for (col_name, row_value) in &aggregate_row { - // for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in - // group_key_entry.iter_mut() - // { - // if col_name != cum_col_name { - // continue; - // } - - // // TODO(edd): remove unwrap - it should work because we are - // // tracking iteration count in loop. - // let row_value = row_value.as_ref().unwrap(); - - // match cum_agg_value { - // Some(agg) => match agg { - // column::Aggregate::Count(cum_count) => { - // *cum_count += 1; - // } - // column::Aggregate::Sum(cum_sum) => { - // *cum_sum += row_value; - // } - // }, - // None => { - // *cum_agg_value = match agg_type { - // Aggregate::Count => Some(column::Aggregate::Count(0)), - // Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())), - // } - // } - // } - // } - // } processed_rows += 1; } - // println!("{:?}", hash_table.len()); + + // Emit final row + results.insert(last_group_row, cum_aggregates); + + log::debug!("{:?}", results); + // results BTreeMap::new() } + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); @@ -734,7 +686,7 @@ impl Segment { } else { // In this case there are grouped values in the column with no // rows falling into time-range/predicate set. - println!( + log::error!( "grouped value {:?} has no rows in time-range/predicate set", group_key_value ); @@ -742,7 +694,7 @@ impl Segment { } } else { // segment doesn't have the column so can't group on it. - println!("don't have column - can't group"); + log::error!("don't have column - can't group"); } grouped_results } @@ -868,26 +820,36 @@ impl<'a> Segments<'a> { panic!("max <= min"); } + // + // TODO - just need to sum up the aggregates within each segment here to get + // the final result. + // for segment in &self.segments { - // // segment.aggregate_by_group_with_hash( - // // time_range, - // // predicates, - // // &group_columns, - // // &aggregates, - // // ); - - segment.aggregate_by_group_with_sort( + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_hash( time_range, predicates, &group_columns, &aggregates, ); + + // segment.aggregate_by_group_with_sort( + // time_range, + // predicates, + // &group_columns, + // &aggregates, + // ); + log::info!( + "processed segment {:?} in {:?}", + segment.time_range(), + now.elapsed() + ) } // let group_columns_arc = std::sync::Arc::new(group_columns); // let aggregates_arc = std::sync::Arc::new(aggregates); - // for chunked_segments in self.segments.chunks(12) { + // for chunked_segments in self.segments.chunks(16) { // crossbeam::scope(|scope| { // for segment in chunked_segments { // let group_columns = group_columns_arc.clone(); @@ -906,38 +868,20 @@ impl<'a> Segments<'a> { // .unwrap(); // } + // let rem = self.segments.len() % 16; + // for segment in &self.segments[self.segments.len() - rem..] { + // segment.aggregate_by_group_with_sort( + // time_range, + // predicates, + // &group_columns_arc.clone(), + // &aggregates_arc.clone(), + // ); + // } + + // TODO(edd): merge results - not expensive really... let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = BTreeMap::new(); - // for segment in &self.segments { - // let segment_results = segment.group_agg_by_predicate_eq( - // time_range, - // predicates, - // &group_columns, - // &aggregates, - // ); - - // for (k, segment_aggs) in segment_results { - // // assert_eq!(v.len(), aggregates.len()); - // let cum_result = cum_results.get_mut(&k); - // match cum_result { - // Some(cum) => { - // assert_eq!(cum.len(), segment_aggs.len()); - // // In this case we need to aggregate the aggregates from - // // each segment. - // for i in 0..cum.len() { - // // TODO(edd): this is more expensive than necessary - // cum[i] = (cum[i].0.clone(), cum[i].1.clone() + &segment_aggs[i].1); - // } - // } - // None => { - // cum_results.insert(k, segment_aggs); - // } - // } - // } - // } - - // // columns cum_results } diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs new file mode 100644 index 0000000000..dff6d9657b --- /dev/null +++ b/delorean_mem_qe/src/sorter.rs @@ -0,0 +1,197 @@ +//! The sorter module provides a sort function which will sort a collection of +//! `Packer` columns by arbitrary columns. All sorting is done in ascending +//! order. +//! +//! `sorter::sort` implements Quicksort using Hoare's partitioning scheme (how +//! you choose the pivot). This partitioning scheme typically significantly +//! reduces the number of swaps necessary but it does have some drawbacks. +//! +//! Firstly, the worse case runtime of this implementation is `O(n^2)` when the +//! input set of columns are sorted according to the desired sort order. To +//! avoid that behaviour, a heuristic is used for inputs over a certain size; +//! large inputs are first linearly scanned to determine if the input is already +//! sorted. +//! +//! Secondly, the sort produced using this partitioning scheme is not stable. +//! +use std::cmp::Ordering; +use std::collections::BTreeSet; +use std::ops::Range; + +use snafu::ensure; +use snafu::Snafu; + +use super::column; + +#[derive(Snafu, Debug, Clone, Copy, PartialEq)] +pub enum Error { + #[snafu(display(r#"Too many sort columns specified"#))] + TooManyColumns, + + #[snafu(display(r#"Same column specified as sort column multiple times"#))] + RepeatedColumns { index: usize }, + + #[snafu(display(r#"Specified column index is out bounds"#))] + OutOfBoundsColumn { index: usize }, +} + +/// Any Packers inputs with more than this many rows will have a linear +/// comparison scan performed on them to ensure they're not already sorted. +const SORTED_CHECK_SIZE: usize = 1000; + +/// Sort a slice of `Packers` based on the provided column indexes. +/// +/// All chosen columns will be sorted in ascending order; the sort is *not* +/// stable. +pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> { + if vectors.is_empty() || sort_by.is_empty() { + return Ok(()); + } + + ensure!(sort_by.len() <= vectors.len(), TooManyColumns); + + let mut col_set = BTreeSet::new(); + for &index in sort_by { + ensure!(col_set.insert(index), RepeatedColumns { index }); + } + + // TODO(edd): map first/last still unstable https://github.com/rust-lang/rust/issues/62924 + if let Some(index) = col_set.range(vectors.len()..).next() { + return OutOfBoundsColumn { index: *index }.fail(); + } + + // Hoare's partitioning scheme can have quadratic runtime behaviour in + // the worst case when the inputs are already sorted. To avoid this, a + // check is added for large inputs. + let n = vectors[0].len(); + if n > SORTED_CHECK_SIZE { + let mut sorted = true; + for i in 1..n { + if cmp(vectors, i - 1, i, sort_by) == Ordering::Greater { + sorted = false; + break; + } + } + + if sorted { + log::debug!("columns already sorted"); + return Ok(()); + } + // if vectors_sorted_asc(vectors, n, sort_by) { + // return Ok(()); + // } + } + let now = std::time::Instant::now(); + quicksort_by(vectors, 0..n - 1, sort_by); + log::debug!("sorted in {:?}", now.elapsed()); + Ok(()) +} + +fn quicksort_by(vectors: &mut [column::Vector], range: Range, sort_by: &[usize]) { + if range.start >= range.end { + return; + } + + let pivot = partition(vectors, &range, sort_by); + quicksort_by(vectors, range.start..pivot, sort_by); + quicksort_by(vectors, pivot + 1..range.end, sort_by); +} + +fn partition(vectors: &mut [column::Vector], range: &Range, sort_by: &[usize]) -> usize { + let pivot = (range.start + range.end) / 2; + let (lo, hi) = (range.start, range.end); + if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less { + swap(vectors, lo as usize, pivot as usize); + } + if cmp(vectors, hi as usize, lo as usize, sort_by) == Ordering::Less { + swap(vectors, lo as usize, hi as usize); + } + if cmp(vectors, pivot as usize, hi as usize, sort_by) == Ordering::Less { + swap(vectors, hi as usize, pivot as usize); + } + + let pivot = hi; + let mut i = range.start; + let mut j = range.end; + + loop { + while cmp(vectors, i as usize, pivot as usize, sort_by) == Ordering::Less { + i += 1; + } + + while cmp(vectors, j as usize, pivot as usize, sort_by) == Ordering::Greater { + j -= 1; + } + + if i >= j { + return j; + } + + swap(vectors, i as usize, j as usize); + i += 1; + j -= 1; + } +} + +fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering { + for &idx in sort_by { + match &vectors[idx] { + column::Vector::String(p) => { + let cmp = p.get(a).cmp(&p.get(b)); + if cmp != Ordering::Equal { + return cmp; + } + // if cmp equal then try next vector. + } + column::Vector::Integer(p) => { + let cmp = p.get(a).cmp(&p.get(b)); + if cmp != Ordering::Equal { + return cmp; + } + // if cmp equal then try next vector. + } + _ => continue, // don't compare on non-string / timestamp cols + } + } + Ordering::Equal +} + +fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool { + 'row_wise: for i in 1..len { + for &idx in sort_by { + match &vectors[idx] { + column::Vector::String(vec) => { + if vec[i - 1] < vec[i] { + continue 'row_wise; + } else if vec[i - 1] == vec[i] { + // try next column + continue; + } else { + // value is > so + return false; + } + } + column::Vector::Integer(vec) => { + if vec[i - 1] < vec[i] { + continue 'row_wise; + } else if vec[i - 1] == vec[i] { + // try next column + continue; + } else { + // value is > so + return false; + } + } + _ => continue, // don't compare on non-string / timestamp cols + } + } + } + true +} + +// Swap the same pair of elements in each packer column +fn swap(vectors: &mut [column::Vector], a: usize, b: usize) { + for p in vectors { + p.swap(a, b); + } +} From 44aaddb8bd2c727f3e737f411d53ed9fad11c19a Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 14 Aug 2020 13:27:53 +0100 Subject: [PATCH 25/73] refactor: tidy up query runs --- delorean_mem_qe/src/bin/main.rs | 347 ++++++++++++++++++++------------ delorean_mem_qe/src/segment.rs | 252 ++++++++++++++++++----- 2 files changed, 414 insertions(+), 185 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index fa97b6e702..d4ee92cc6f 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -5,7 +5,7 @@ use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; use delorean_mem_qe::column::{Column, Scalar}; -use delorean_mem_qe::segment::{Aggregate, Segment}; +use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment}; use delorean_mem_qe::Store; // use snafu::ensure; @@ -38,7 +38,13 @@ fn main() { store.size(), ); - time_group_by_agg(&store); + time_select_with_pred(&store); + time_first_host(&store); + time_sum_range(&store); + time_count_range(&store); + time_group_single_with_pred(&store); + time_group_by_multi_agg_count(&store); + time_group_by_multi_agg_SORTED_count(&store); // time_column_min_time(&store); // time_column_max_time(&store); @@ -270,165 +276,242 @@ fn convert_record_batch(rb: RecordBatch) -> Result { Ok(segment) } -fn time_column_min_time(store: &Store) { - let repeat = 1000; +// +// SELECT FIRST(host) FROM measurement +// +fn time_first_host(store: &Store) { + let repeat = 100; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_min = 0; - for _ in 1..repeat { - let now = std::time::Instant::now(); - let segments = store.segments(); - let min = segments.column_min("time").unwrap(); - total_time += now.elapsed(); - - if let Scalar::Integer(v) = min { - total_min += v - } - } - println!( - "Ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - total_min - ); -} - -fn time_column_max_time(store: &Store) { - let repeat = 1000; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; - for _ in 1..repeat { - let now = std::time::Instant::now(); - let segments = store.segments(); - let max = segments.column_max("time").unwrap(); - total_time += now.elapsed(); - - if let Scalar::Integer(v) = max { - total_max += v - } - } - println!( - "Ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - total_max - ); -} - -fn time_column_first(store: &Store) { - let repeat = 100000; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; - for _ in 1..repeat { - let now = std::time::Instant::now(); - let segments = store.segments(); - let res = segments.first("host").unwrap(); - total_time += now.elapsed(); - total_max += res.0; - } - println!( - "Ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - total_max - ); -} - -// fn time_row_by_last_ts(store: &Store) { -// let repeat = 100000; -// let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); -// let mut total_max = 0; -// let segments = store.segments(); -// for _ in 0..repeat { -// let now = std::time::Instant::now(); - -// let (_, _, row_id) = segments.last("time").unwrap(); -// let res = segments.segments().last().unwrap().row(row_id).unwrap(); -// total_time += now.elapsed(); -// total_max += res.len(); -// } -// println!( -// "Ran {:?} in {:?} {:?} / call {:?}", -// repeat, -// total_time, -// total_time / repeat, -// total_max -// ); -// } - -fn time_row_by_preds(store: &Store) { - let repeat = 100000; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; + let mut track = 0; let segments = store.segments(); for _ in 0..repeat { let now = std::time::Instant::now(); - let rows = segments - .segments() - .last() - .unwrap() - .filter_by_predicates_eq( - (1590040770000000, 1590040790000000), - &vec![ - ("env", Some(&column::Scalar::String("prod01-us-west-2"))), - ("method", Some(&column::Scalar::String("GET"))), - ( - "host", - Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), - ), - ], - ) - .unwrap(); - - // for row_id in rows.iter() { - // println!( - // "{:?} - {:?}", - // row_id, - // segments.segments().last().unwrap().row(row_id as usize) - // ); - // } + let (ts, _, _) = segments.first("host").unwrap(); total_time += now.elapsed(); - total_max += rows.cardinality(); + track += ts; } println!( - "Ran {:?} in {:?} {:?} / call {:?}", + "time_first_host ran {:?} in {:?} {:?} / call {:?}", repeat, total_time, total_time / repeat, - total_max + track ); } -fn time_group_by_agg(store: &Store) { - let repeat = 10; +// +// SELECT SUM(counter) FROM measurement +// WHERE time >= "2020-05-07 06:48:00" AND time < "2020-05-21 07:00:10" +// +fn time_sum_range(store: &Store) { + let repeat = 100; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; + let segments = store.segments(); + let mut track = 0.0; + for _ in 0..repeat { + let now = std::time::Instant::now(); + + for segment in segments.segments() { + let filtered_ids = + segment.filter_by_predicates_eq((1588834080000000, 1590044410000000), &[]); + if let Some(mut row_ids) = filtered_ids { + if let column::Scalar::Float(v) = + segment.sum_column("counter", &mut row_ids).unwrap() + { + track += v; + } + } + } + + total_time += now.elapsed(); + } + println!( + "time_sum_range ran {:?} in {:?} {:?} / total {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + +// +// SELECT COUNT(counter) FROM measurement +// WHERE time >= "2020-05-07 06:48:00" AND time < "2020-05-21 07:00:10" +// +fn time_count_range(store: &Store) { + let repeat = 100; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; let segments = store.segments(); for _ in 0..repeat { let now = std::time::Instant::now(); - let groups = segments.read_group_eq( - (0, 1590044410000000), - &[], - vec!["status".to_string(), "method".to_string()], + for segment in segments.segments() { + let filtered_ids = + segment.filter_by_predicates_eq((1588834080000000, 1590044410000000), &[]); + if let Some(mut row_ids) = filtered_ids { + track += segment.count_column("counter", &mut row_ids).unwrap(); + } + } + + total_time += now.elapsed(); + } + println!( + "time_count_range ran {:?} in {:?} {:?} / total {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + +// +// SELECT env, method, host, counter, time +// FROM measurement +// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" +// AND "env" = "prod01-eu-central-1" +// +fn time_select_with_pred(store: &Store) { + let repeat = 100; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let columns = segments.read_filter_eq( + (1590036110000000, 1590040770000000), + &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))], vec![ - ("counter".to_string(), Aggregate::Sum), - // ("counter".to_string(), Aggregate::Count), + "env".to_string(), + "method".to_string(), + "host".to_string(), + "counter".to_string(), + "time".to_string(), ], ); total_time += now.elapsed(); - total_max += groups.len(); + track += columns.len(); } println!( - "Ran {:?} in {:?} {:?} / call {:?}", + "time_select_with_pred ran {:?} in {:?} {:?} / call {:?}", repeat, total_time, total_time / repeat, - total_max + track ); } + +// +// SELECT env, method, host, counter, time +// FROM measurement +// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" +// AND "env" = "prod01-eu-central-1" +// +fn time_group_single_with_pred(store: &Store) { + let repeat = 100; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + for segment in segments.segments() { + let results = segment.group_single_agg_by_predicate_eq( + (1588834080000000, 1590044410000000), + &[], + &"env".to_string(), + &vec![("counter".to_string(), Aggregate::Count)], + ); + track += results.len(); + } + + total_time += now.elapsed(); + } + println!( + "time_group_single_with_pred ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + +fn time_group_by_multi_agg_count(store: &Store) { + let strats = vec![ + GroupingStrategy::HashGroup, + GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::SortGroup, + GroupingStrategy::SortGroupConcurrent, + ]; + + for strat in &strats { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.read_group_eq( + (1589000000000001, 1590044410000000), + &[], + vec!["status".to_string(), "method".to_string()], + vec![("counter".to_string(), Aggregate::Count)], + strat, + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "time_group_by_multi_agg_count_{:?} ran {:?} in {:?} {:?} / call {:?}", + strat, + repeat, + total_time, + total_time / repeat, + total_max + ); + } +} + +fn time_group_by_multi_agg_SORTED_count(store: &Store) { + let strats = vec![ + GroupingStrategy::HashGroup, + GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::SortGroup, + GroupingStrategy::SortGroupConcurrent, + ]; + + for strat in &strats { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.read_group_eq( + (1589000000000001, 1590044410000000), + &[], + vec!["env".to_string(), "role".to_string()], + vec![("counter".to_string(), Aggregate::Count)], + strat, + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "time_group_by_multi_agg_SORTED_count_{:?} ran {:?} in {:?} {:?} / call {:?}", + strat, + repeat, + total_time, + total_time / repeat, + total_max + ); + } +} diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 8e4189982d..8f6c29b812 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -3,6 +3,9 @@ use std::collections::{BTreeMap, HashMap}; use super::column; use super::column::Column; +// Only used in a couple of specific places for experimentation. +const THREADS: usize = 16; + #[derive(Debug)] pub struct Segment { meta: SegmentMetaData, @@ -137,12 +140,11 @@ impl Segment { pub fn group_by_column_ids( &self, name: &str, - ) -> Option<&std::collections::BTreeMap, croaring::Bitmap>> { - unimplemented!("just need to convert encoded keys into decoded strings"); - // if let Some(c) = self.column(name) { - // return Some(c.group_by_ids()); - // } - // None + ) -> Option<&std::collections::BTreeMap> { + if let Some(c) = self.column(name) { + return Some(c.group_by_ids()); + } + None } pub fn aggregate_by_group_with_hash( @@ -639,7 +641,7 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_column: &String, aggregates: &Vec<(String, Aggregate)>, - ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + ) -> BTreeMap> { let mut grouped_results = BTreeMap::new(); let filter_row_ids: croaring::Bitmap; @@ -682,7 +684,7 @@ impl Segment { // Next add these aggregates to the result set, keyed // by the grouped value. assert_eq!(aggs.len(), aggregates.len()); - grouped_results.insert(vec![group_key_value.clone().unwrap()], aggs); + grouped_results.insert(*group_key_value, aggs); } else { // In this case there are grouped values in the column with no // rows falling into time-range/predicate set. @@ -814,16 +816,118 @@ impl<'a> Segments<'a> { predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, aggregates: Vec<(String, Aggregate)>, + strategy: &GroupingStrategy, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { let (min, max) = time_range; if max <= min { panic!("max <= min"); } - // - // TODO - just need to sum up the aggregates within each segment here to get - // the final result. - // + match strategy { + GroupingStrategy::HashGroup => { + return self.read_group_eq_hash( + time_range, + predicates, + group_columns, + aggregates, + false, + ) + } + GroupingStrategy::HashGroupConcurrent => { + return self.read_group_eq_hash( + time_range, + predicates, + group_columns, + aggregates, + true, + ) + } + GroupingStrategy::SortGroup => { + return self.read_group_eq_sort( + time_range, + predicates, + group_columns, + aggregates, + false, + ) + } + GroupingStrategy::SortGroupConcurrent => { + return self.read_group_eq_sort( + time_range, + predicates, + group_columns, + aggregates, + true, + ) + } + } + + // TODO(edd): merge results - not expensive really... + let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = + BTreeMap::new(); + + cum_results + } + + fn read_group_eq_hash( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: Vec, + aggregates: Vec<(String, Aggregate)>, + concurrent: bool, + ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + if concurrent { + let group_columns_arc = std::sync::Arc::new(group_columns); + let aggregates_arc = std::sync::Arc::new(aggregates); + + for chunked_segments in self.segments.chunks(THREADS) { + crossbeam::scope(|scope| { + for segment in chunked_segments { + let group_columns = group_columns_arc.clone(); + let aggregates = aggregates_arc.clone(); + + scope.spawn(move |_| { + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_hash( + time_range, + predicates, + &group_columns, + &aggregates, + ); + log::info!( + "processed segment {:?} using multi-threaded hash-grouping in {:?}", + segment.time_range(), + now.elapsed() + ) + }); + } + }) + .unwrap(); + } + + let rem = self.segments.len() % THREADS; + for segment in &self.segments[self.segments.len() - rem..] { + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_hash( + time_range, + predicates, + &group_columns_arc.clone(), + &aggregates_arc.clone(), + ); + log::info!( + "processed segment {:?} using multi-threaded hash-grouping in {:?}", + segment.time_range(), + now.elapsed() + ) + } + + // TODO(edd): aggregate the aggregates. not expensive + return BTreeMap::new(); + } + + // Single threaded + for segment in &self.segments { let now = std::time::Instant::now(); segment.aggregate_by_group_with_hash( @@ -832,57 +936,91 @@ impl<'a> Segments<'a> { &group_columns, &aggregates, ); - - // segment.aggregate_by_group_with_sort( - // time_range, - // predicates, - // &group_columns, - // &aggregates, - // ); log::info!( - "processed segment {:?} in {:?}", + "processed segment {:?} using single-threaded hash-grouping in {:?}", segment.time_range(), now.elapsed() ) } - // let group_columns_arc = std::sync::Arc::new(group_columns); - // let aggregates_arc = std::sync::Arc::new(aggregates); + BTreeMap::new() + } - // for chunked_segments in self.segments.chunks(16) { - // crossbeam::scope(|scope| { - // for segment in chunked_segments { - // let group_columns = group_columns_arc.clone(); - // let aggregates = aggregates_arc.clone(); + fn read_group_eq_sort( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: Vec, + aggregates: Vec<(String, Aggregate)>, + concurrent: bool, + ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + if concurrent { + let group_columns_arc = std::sync::Arc::new(group_columns); + let aggregates_arc = std::sync::Arc::new(aggregates); - // scope.spawn(move |_| { - // segment.aggregate_by_group_with_sort( - // time_range, - // predicates, - // &group_columns, - // &aggregates, - // ); - // }); - // } - // }) - // .unwrap(); - // } + for chunked_segments in self.segments.chunks(THREADS) { + crossbeam::scope(|scope| { + for segment in chunked_segments { + let group_columns = group_columns_arc.clone(); + let aggregates = aggregates_arc.clone(); - // let rem = self.segments.len() % 16; - // for segment in &self.segments[self.segments.len() - rem..] { - // segment.aggregate_by_group_with_sort( - // time_range, - // predicates, - // &group_columns_arc.clone(), - // &aggregates_arc.clone(), - // ); - // } + scope.spawn(move |_| { + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_sort( + time_range, + predicates, + &group_columns, + &aggregates, + ); + log::info!( + "processed segment {:?} using multi-threaded hash-grouping in {:?}", + segment.time_range(), + now.elapsed() + ) + }); + } + }) + .unwrap(); + } - // TODO(edd): merge results - not expensive really... - let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = - BTreeMap::new(); + let rem = self.segments.len() % THREADS; + for segment in &self.segments[self.segments.len() - rem..] { + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_sort( + time_range, + predicates, + &group_columns_arc.clone(), + &aggregates_arc.clone(), + ); + log::info!( + "processed segment {:?} using multi-threaded hash-grouping in {:?}", + segment.time_range(), + now.elapsed() + ) + } - cum_results + // TODO(edd): aggregate the aggregates. not expensive + return BTreeMap::new(); + } + + // Single threaded + + for segment in &self.segments { + let now = std::time::Instant::now(); + segment.aggregate_by_group_with_sort( + time_range, + predicates, + &group_columns, + &aggregates, + ); + log::info!( + "processed segment {:?} using single-threaded hash-grouping in {:?}", + segment.time_range(), + now.elapsed() + ) + } + + BTreeMap::new() } /// Returns the minimum value for a column in a set of segments. @@ -936,7 +1074,7 @@ impl<'a> Segments<'a> { /// /// TODO(edd): could return NULL value.. pub fn first(&self, column_name: &str) -> Option<(i64, Option, usize)> { - // First let's find the segment with the latest time range. + // First let's find the segment with the earliest time range. // notice we order a < b on max time range. let segment = self .segments @@ -992,5 +1130,13 @@ impl<'a> Segments<'a> { } } +#[derive(Debug)] +pub enum GroupingStrategy { + HashGroup, + HashGroupConcurrent, + SortGroup, + SortGroupConcurrent, +} + #[cfg(test)] mod test {} From 7f815099d051b562d08a4dbd69d3a82f210995b2 Mon Sep 17 00:00:00 2001 From: alamb Date: Tue, 18 Aug 2020 14:10:18 -0400 Subject: [PATCH 26/73] feat: Read from parquet rather than arrow --- Cargo.lock | 1 + delorean_mem_qe/Cargo.toml | 4 ++-- delorean_mem_qe/src/bin/main.rs | 26 +++++++++++++++++--------- delorean_mem_qe/src/column.rs | 12 ++++++------ delorean_mem_qe/src/encoding.rs | 2 +- delorean_mem_qe/src/segment.rs | 6 +++--- delorean_mem_qe/src/sorter.rs | 1 + delorean_table/src/sorter.rs | 1 + 8 files changed, 32 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fad93f1f3e..53b07ad87f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -771,6 +771,7 @@ dependencies = [ "delorean_table", "env_logger", "log", + "parquet 2.0.0-SNAPSHOT", "snafu", ] diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 89b9ff1e9a..d569025cef 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -9,7 +9,8 @@ edition = "2018" [dependencies] delorean_table = { path = "../delorean_table" } -arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" @@ -18,4 +19,3 @@ log = "0.4.11" env_logger = "0.7.1" [dev-dependencies] - diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index d4ee92cc6f..9ee787ac8c 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -1,12 +1,13 @@ -use std::{fs::File, path::Path}; +use std::{fs::File, rc::Rc}; use arrow::record_batch::{RecordBatch, RecordBatchReader}; -use arrow::{array, array::Array, datatypes, ipc}; +use arrow::{array, array::Array, datatypes}; use delorean_mem_qe::column; -use delorean_mem_qe::column::{Column, Scalar}; +use delorean_mem_qe::column::{Column}; use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment}; use delorean_mem_qe::Store; +use parquet::arrow::arrow_reader::ArrowReader; // use snafu::ensure; use snafu::Snafu; @@ -26,11 +27,18 @@ pub enum Error { fn main() { env_logger::init(); - let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); - let reader = ipc::reader::StreamReader::try_new(r).unwrap(); + //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); + println!("Opening the file...."); + let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet").unwrap(); + let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); + let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); + let batch_size = 60000; + let record_batch_reader = reader.get_record_reader(batch_size).unwrap(); + + //let reader = ipc::reader::StreamReader::try_new(r).unwrap(); let mut store = Store::default(); - build_store(reader, &mut store).unwrap(); + build_store(record_batch_reader, &mut store).unwrap(); println!( "total segments {:?} with total size {:?}", @@ -44,7 +52,7 @@ fn main() { time_count_range(&store); time_group_single_with_pred(&store); time_group_by_multi_agg_count(&store); - time_group_by_multi_agg_SORTED_count(&store); + time_group_by_multi_agg_sorted_count(&store); // time_column_min_time(&store); // time_column_max_time(&store); @@ -164,7 +172,7 @@ fn main() { } fn build_store( - mut reader: arrow::ipc::reader::StreamReader, + mut reader: impl RecordBatchReader, store: &mut Store, ) -> Result<(), Error> { // let mut i = 0; @@ -478,7 +486,7 @@ fn time_group_by_multi_agg_count(store: &Store) { } } -fn time_group_by_multi_agg_SORTED_count(store: &Store) { +fn time_group_by_multi_agg_sorted_count(store: &Store) { let strats = vec![ GroupingStrategy::HashGroup, GroupingStrategy::HashGroupConcurrent, diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 144c2bd6ec..e8bd8bd700 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -12,7 +12,7 @@ pub enum Scalar<'a> { impl<'a> Scalar<'a> { pub fn reset(&mut self) { match self { - Scalar::String(s) => { + Scalar::String(_s) => { panic!("not supported"); } Scalar::Float(v) => { @@ -509,10 +509,10 @@ impl Column { // FIX THIS UNWRAP AND HOPE THERE ARE NO NULL VALUES! c.decode_id(encoded_id).unwrap() } - Column::Float(c) => { + Column::Float(_c) => { unreachable!("this isn't supported right now"); } - Column::Integer(c) => { + Column::Integer(_c) => { unreachable!("this isn't supported right now"); } } @@ -542,7 +542,7 @@ impl Column { } /// materialise all rows including and after row_id - pub fn scan_from(&self, row_id: usize) -> Option { + pub fn scan_from(&self, _row_id: usize) -> Option { unimplemented!("todo"); // if row_id >= self.num_rows() { // println!( @@ -764,7 +764,7 @@ impl Column { // WHERE counter >= 102.2 AND counter < 2929.32 pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option { match self { - Column::String(c) => { + Column::String(_c) => { unimplemented!("not implemented yet"); } Column::Float(c) => { @@ -926,7 +926,7 @@ impl String { self.data.scan_from(row_id) } - pub fn scan_from_until_some(&self, row_id: usize) -> Option<&std::string::String> { + pub fn scan_from_until_some(&self, _row_id: usize) -> Option<&std::string::String> { unreachable!("don't need this"); // self.data.scan_from_until_some(row_id) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 4cec3b25f6..6d9ce4e4f8 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -68,7 +68,7 @@ where } // TODO(edd): fix this when added NULL support - pub fn scan_from_until_some(&self, row_id: usize) -> Option { + pub fn scan_from_until_some(&self, _row_id: usize) -> Option { unreachable!("to remove"); // for v in self.values.iter().skip(row_id) { // return Some(*v); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 8f6c29b812..2bbbdea2f9 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -863,10 +863,10 @@ impl<'a> Segments<'a> { } // TODO(edd): merge results - not expensive really... - let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = - BTreeMap::new(); + // let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = + // BTreeMap::new(); - cum_results + // cum_results } fn read_group_eq_hash( diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs index dff6d9657b..c8b01a3432 100644 --- a/delorean_mem_qe/src/sorter.rs +++ b/delorean_mem_qe/src/sorter.rs @@ -156,6 +156,7 @@ fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ord Ordering::Equal } +#[allow(dead_code)] fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool { 'row_wise: for i in 1..len { for &idx in sort_by { diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs index a78be80bc0..7911b1b310 100644 --- a/delorean_table/src/sorter.rs +++ b/delorean_table/src/sorter.rs @@ -167,6 +167,7 @@ fn cmp(packers: &[Packers], a: usize, b: usize, sort_by: &[usize]) -> Ordering { Ordering::Equal } +#[allow(dead_code)] fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> bool { 'row_wise: for i in 1..len { for &idx in sort_by { From acfef35a0e71a90cbe1d6aba9da2cec917ac9f90 Mon Sep 17 00:00:00 2001 From: alamb Date: Wed, 19 Aug 2020 12:55:09 -0400 Subject: [PATCH 27/73] feat: load segments from parquet --- Cargo.lock | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53b07ad87f..ec0e71d15c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -653,7 +653,7 @@ dependencies = [ "crossbeam", "fnv", "num_cpus", - "parquet", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "paste", "rustyline", "sqlparser", @@ -716,7 +716,7 @@ version = "0.1.0" dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "datafusion", - "parquet", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", ] [[package]] @@ -771,7 +771,7 @@ dependencies = [ "delorean_table", "env_logger", "log", - "parquet 2.0.0-SNAPSHOT", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", "snafu", ] @@ -2048,6 +2048,25 @@ dependencies = [ "zstd", ] +[[package]] +name = "parquet" +version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" +dependencies = [ + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "brotli", + "byteorder", + "chrono", + "flate2", + "lz4", + "num-bigint 0.3.0", + "parquet-format", + "serde_json", + "snap", + "thrift", + "zstd", +] + [[package]] name = "parquet-format" version = "2.6.1" From 820277a52983b1c168cacb1332354c9b70e2b532 Mon Sep 17 00:00:00 2001 From: alamb Date: Wed, 19 Aug 2020 12:55:09 -0400 Subject: [PATCH 28/73] feat: load segments from parquet --- Cargo.lock | 19 ++++++++++ delorean_mem_qe/Cargo.toml | 9 ++++- delorean_mem_qe/src/bin/main.rs | 66 +++++++++++++++++++++++++-------- delorean_mem_qe/src/column.rs | 2 +- delorean_mem_qe/src/lib.rs | 3 +- delorean_mem_qe/src/segment.rs | 1 + 6 files changed, 80 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec0e71d15c..d2c35bfd59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,6 +125,24 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow" +version = "2.0.0-SNAPSHOT" +dependencies = [ + "chrono", + "csv", + "flatbuffers", + "hex", + "indexmap", + "lazy_static", + "num 0.3.0", + "rand", + "regex", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "arrow-flight" version = "2.0.0-SNAPSHOT" @@ -770,6 +788,7 @@ dependencies = [ "crossbeam", "delorean_table", "env_logger", + "human_format", "log", "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", "snafu", diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index d569025cef..e5024b7932 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -9,13 +9,18 @@ edition = "2018" [dependencies] delorean_table = { path = "../delorean_table" } -arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } -parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +#arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" } +#parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" } snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" chrono = "0.4" log = "0.4.11" env_logger = "0.7.1" +human_format = "1.0.3" + + [dev-dependencies] diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 9ee787ac8c..02d9752db3 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -1,4 +1,4 @@ -use std::{fs::File, rc::Rc}; +use std::{fs, fs::File, rc::Rc, path::PathBuf}; use arrow::record_batch::{RecordBatch, RecordBatchReader}; use arrow::{array, array::Array, datatypes}; @@ -11,6 +11,7 @@ use parquet::arrow::arrow_reader::ArrowReader; // use snafu::ensure; use snafu::Snafu; +use datatypes::TimeUnit; #[derive(Snafu, Debug, Clone, Copy, PartialEq)] pub enum Error { @@ -24,12 +25,23 @@ pub enum Error { // OutOfBoundsColumn { index: usize }, } +fn format_size(sz: usize) -> String { + human_format::Formatter::new().format(sz as f64) +} + + fn main() { env_logger::init(); //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); - println!("Opening the file...."); - let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet").unwrap(); + // This one was having issues being read into arrow with the last row groups + let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet"); + let r = File::open(&path).unwrap(); + let file_size = fs::metadata(&path).expect("read metadata").len(); + println!("Reading {} ({}) bytes of parquet from {:?}....", + format_size(file_size as usize), file_size, path); + + //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap(); let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); let batch_size = 60000; @@ -41,9 +53,10 @@ fn main() { build_store(record_batch_reader, &mut store).unwrap(); println!( - "total segments {:?} with total size {:?}", + "total segments {:?} with total size {} ({})", store.segment_total(), - store.size(), + format_size(store.size()), + store.size() ); time_select_with_pred(&store); @@ -175,22 +188,34 @@ fn build_store( mut reader: impl RecordBatchReader, store: &mut Store, ) -> Result<(), Error> { - // let mut i = 0; - while let Some(rb) = reader.next_batch().unwrap() { - // if i < 363 { - // i += 1; - // continue; - // } - let segment = convert_record_batch(rb)?; - store.add_segment(segment); + let mut total_rows_read = 0; + let start = std::time::Instant::now(); + loop { + let rb = reader.next_batch(); + match rb { + Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), + Ok(Some(rb)) => { + // if i < 363 { + // i += 1; + // continue; + // } + total_rows_read += rb.num_rows(); + let segment = convert_record_batch(rb)?; + store.add_segment(segment); + }, + Ok(None) => { + let now = std::time::Instant::now(); + println!("Completed loading {} rows in {:?}", total_rows_read, now - start); + return Ok(()) + } + } } - Ok(()) } fn convert_record_batch(rb: RecordBatch) -> Result { let mut segment = Segment::new(rb.num_rows()); - // println!("cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); + println!("Loading record batch: cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); for (i, column) in rb.columns().iter().enumerate() { match *column.data_type() { datatypes::DataType::Float64 => { @@ -214,6 +239,15 @@ fn convert_record_batch(rb: RecordBatch) -> Result { let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); } + datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => { + if column.null_count() > 0 { + panic!("null times"); + } + let arr = column.as_any().downcast_ref::().unwrap(); + + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), column); + } datatypes::DataType::Utf8 => { let arr = column .as_any() @@ -278,7 +312,7 @@ fn convert_record_batch(rb: RecordBatch) -> Result { datatypes::DataType::Boolean => { panic!("unsupported"); } - _ => panic!("unsupported datatype"), + ref d @ _ => panic!("unsupported datatype: {:?}", d), } } Ok(segment) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index e8bd8bd700..a2ac7e83c2 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -310,7 +310,7 @@ impl Column { } } - // Returns the size of the segment in bytes. + // Returns the size of the column in bytes. pub fn size(&self) -> usize { match self { Column::String(c) => c.size(), diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 150e5ad588..5fa99ebb1a 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -9,6 +9,7 @@ use segment::{Segment, Segments}; pub struct Store { segments: Vec, + /// Total size of the store, in bytes store_size: usize, } @@ -18,7 +19,7 @@ impl Store { self.segments.push(segment); } - /// The total size of all segments in the store.s + /// The total size of all segments in the store, in bytes. pub fn size(&self) -> usize { self.store_size } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 2bbbdea2f9..079b8b3dcd 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -71,6 +71,7 @@ impl Segment { } // TODO - iterator.... + /// Returns the size of the segment in bytes. pub fn size(&self) -> usize { let mut size = 0; for c in &self.columns { From 41899203d9e8ba21a555965b81cd4c946f35a4a9 Mon Sep 17 00:00:00 2001 From: alamb Date: Wed, 19 Aug 2020 13:52:27 -0400 Subject: [PATCH 29/73] refactor: implement a prototype datafusion integration layer demonstration --- Cargo.lock | 2 + delorean_mem_qe/Cargo.toml | 2 + delorean_mem_qe/src/adapter.rs | 333 ++++++++++++++++++++++++++++++++ delorean_mem_qe/src/bin/main.rs | 95 +++++++-- delorean_mem_qe/src/lib.rs | 11 ++ delorean_mem_qe/src/segment.rs | 17 +- 6 files changed, 436 insertions(+), 24 deletions(-) create mode 100644 delorean_mem_qe/src/adapter.rs diff --git a/Cargo.lock b/Cargo.lock index d2c35bfd59..e25b863d24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,6 +136,7 @@ dependencies = [ "indexmap", "lazy_static", "num 0.3.0", + "prettytable-rs", "rand", "regex", "serde", @@ -786,6 +787,7 @@ dependencies = [ "chrono", "croaring", "crossbeam", + "datafusion", "delorean_table", "env_logger", "human_format", diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index e5024b7932..1b79c94649 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -13,6 +13,8 @@ delorean_table = { path = "../delorean_table" } arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" } #parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" } +#datafusion = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } +datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" } snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs new file mode 100644 index 0000000000..90138cbd29 --- /dev/null +++ b/delorean_mem_qe/src/adapter.rs @@ -0,0 +1,333 @@ +//! Code for interfacing and running queries in DataFusion + +use crate::Store; +use arrow::{ + datatypes::{Schema, SchemaRef}, + util::pretty, record_batch::{RecordBatch, RecordBatchReader}, +}; +use datafusion::prelude::*; +use datafusion::{ + datasource::TableProvider, + execution::{ + context::ExecutionContextState, + physical_plan::{ExecutionPlan, Partition, common::RecordBatchIterator}, + }, + logicalplan::{make_logical_plan_node, Expr, LogicalPlan}, + lp::LogicalPlanNode, + optimizer::utils, +}; + +use std::{ + fmt, + sync::{Arc, Mutex}, +}; +use crate::column; + +/// Wrapper to adapt a Store to a DataFusion "TableProvider" -- +/// eventually we could also implement this directly on Store +pub struct StoreTableSource { + store: Arc, +} + +impl<'a> StoreTableSource { + pub fn new(store: Arc) -> Self { + Self { store } + } +} + +impl TableProvider for StoreTableSource { + /// Get a reference to the schema for this table + fn schema(&self) -> SchemaRef { + self.store.schema() + } + + /// Perform a scan of a table and return a sequence of iterators over the data (one + /// iterator per partition) + fn scan( + &self, + _projection: &Option>, + _batch_size: usize, + ) -> datafusion::error::Result>> { + unimplemented!("scan not yet implemented"); + } +} + +/// Prototype of how a Delorean query engine, built on top of +/// DataFusion, but using specialized column store operators might +/// look like. +/// +/// Data from the Segments in the `store` are visible in DataFusion +/// as a table ("measurement") in this prototype. +pub struct DeloreanQueryEngine { + ctx: ExecutionContext, + store: Arc, +} + +impl DeloreanQueryEngine { + pub fn new(store: Arc) -> Self { + let start = std::time::Instant::now(); + let mut ctx = ExecutionContext::new(); + let source = StoreTableSource::new(store.clone()); + let source = Box::new(source); + ctx.register_table("measurement", source); + println!("Completed setup in {:?}", start.elapsed()); + DeloreanQueryEngine { ctx, store } + } + + // Run the specified SQL and return the number of records matched + pub fn run_sql(&mut self, sql: &str) -> usize { + let plan = self + .ctx + .create_logical_plan(sql) + .expect("Creating the logical plan"); + + //println!("Created logical plan:\n{:?}", plan); + let plan = self.rewrite_to_segment_scan(&plan); + //println!("Rewritten logical plan:\n{:?}", plan); + + match self.ctx.collect_plan(&plan) { + Err(err) => { + println!("Error running query: {:?}", err); + 0 + } + Ok(results) => { + if results.is_empty() { + //println!("Empty result returned"); + 0 + } else { + pretty::print_batches(&results).expect("printing"); + results.iter().map(|b| b.num_rows()).sum() + } + } + } + } + + /// Specialized optimizer pass that combines a `TableScan` and a `Filter` + /// together into a SegementStore with the predicates. + /// + /// For example, given this input: + /// + /// Projection: #env, #method, #host, #counter, #time + /// Filter: #time GtEq Int64(1590036110000000) + /// TableScan: measurement projection=None + /// + /// The following plan would be produced + /// Projection: #env, #method, #host, #counter, #time + /// SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000) + /// + fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan { + if let LogicalPlan::Filter { predicate, input } = plan { + // see if the input is a TableScan + if let LogicalPlan::TableScan { .. } = **input + { + return make_logical_plan_node(Box::new(SegmentScan::new( + self.store.clone(), + predicate.clone(), + ))); + } + } + + // otherwise recursively apply + let optimized_inputs = utils::inputs(&plan) + .iter() + .map(|input| self.rewrite_to_segment_scan(input)) + .collect(); + + return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs) + .expect("Created plan"); + } +} + +/// LogicalPlan node that serves as a scan of the segment store with optional predicates +struct SegmentScan { + /// The underlying Store + store: Arc, + + schema: SchemaRef, + + /// The predicate to apply during the scan + predicate: Expr, +} + +impl SegmentScan { + fn new(store: Arc, predicate: Expr) -> Self { + let schema = store.schema().clone(); + + SegmentScan { + store, + schema, + predicate, + } + } + +} + +impl LogicalPlanNode for SegmentScan { + /// Return a reference to the logical plan's inputs + fn inputs(&self) -> Vec<&LogicalPlan> { + Vec::new() + } + + /// Get a reference to the logical plan's schema + fn schema(&self) -> &Schema { + self.schema.as_ref() + } + + /// returns all expressions (non-recursively) in the current logical plan node. + fn expressions(&self) -> Vec { + // The predicate expression gets absorbed by this node As + // there are no inputs, there are no exprs that operate on + // inputs + Vec::new() + } + + /// Write a single line human readable string to `f` for use in explain plan + fn format_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "SegmentScan: {:?} predicate {:?}", + self.store.as_ref() as *const Store, + self.predicate + ) + } + + /// Create a clone of this node. + /// + /// Note std::Clone needs a Sized type, so we must implement a + /// clone that creates a node with a known Size (i.e. Box) + // + fn dyn_clone(&self) -> Box { + Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) + } + + /// Create a clone of this LogicalPlanNode with inputs and expressions replaced. + /// + /// Note that exprs and inputs are in the same order as the result + /// of self.inputs and self.exprs. + /// + /// So, clone_from_template(exprs).exprs() == exprs + fn clone_from_template( + &self, + exprs: &Vec, + inputs: &Vec, + ) -> Box { + assert_eq!(exprs.len(), 0, "no exprs expected"); + assert_eq!(inputs.len(), 0, "no inputs expected"); + Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) + } + + /// Create the corresponding physical scheplan for this node + fn create_physical_plan( + &self, + input_physical_plans: Vec>, + _ctx_state: Arc>, + ) -> datafusion::error::Result> { + assert_eq!(input_physical_plans.len(), 0, "Can not have inputs"); + + // If this were real code, we would now progrmatically + // transform the DataFusion Expr into the specific form needed + // by the Segment. However, to save prototype time we just + // hard code it here instead + assert_eq!( + format!("{:?}", self.predicate), + "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")" + ); + + let time_range = (1590036110000000, 1590040770000000); + let string_predicate = StringPredicate { + col_name: "env".into(), + value: "prod01-eu-central-1".into() + }; + + Ok(Arc::new(SegmentScanExec::new( + self.store.clone(), + time_range, + string_predicate, + ))) + } + +} + + +#[derive(Debug, Clone)] +struct StringPredicate { + col_name: String, + value: String, +} + +/// StoreScan execution node +#[derive(Debug)] +pub struct SegmentScanExec { + store: Arc, + + // Specialized predicates to apply + time_range: (i64, i64), + string_predicate: StringPredicate + +} + +impl SegmentScanExec { + fn new(store: Arc, time_range: (i64, i64), string_predicate: StringPredicate) -> Self { + SegmentScanExec { store , time_range, string_predicate } + } +} + +impl ExecutionPlan for SegmentScanExec { + fn schema(&self) -> SchemaRef { + self.store.schema() + } + + fn partitions(&self) -> datafusion::error::Result>> { + let store = self.store.clone(); + Ok(vec![Arc::new(SegmentPartition{ + store, + time_range: self.time_range, + string_predicate: self.string_predicate.clone(), + })]) + } +} + +#[derive(Debug)] +struct SegmentPartition { + store: Arc, + time_range: (i64, i64), + string_predicate: StringPredicate, + +} + +impl Partition for SegmentPartition { + fn execute(&self) -> datafusion::error::Result>> { + let combined_results: Vec> = vec![]; + + let segments = self.store.segments(); + + // prepare the string predicates in the manner Segments want them + let col_name = &self.string_predicate.col_name; + let scalar = column::Scalar::String(&self.string_predicate.value); + + + // Here + let _columns = segments.read_filter_eq( + self.time_range, + &[(col_name, Some(&scalar))], + vec![ + "env".to_string(), + "method".to_string(), + "host".to_string(), + "counter".to_string(), + "time".to_string(), + ], + ); + + // If we were implementing this for real, we would not convert + // `columns` into RecordBatches and feed them back out + + + Ok(Arc::new(Mutex::new(RecordBatchIterator::new( + self.store.schema().clone(), + combined_results, + )))) + + + } +} diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 02d9752db3..d371cf87e1 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -1,17 +1,17 @@ -use std::{fs, fs::File, rc::Rc, path::PathBuf}; +use std::{fs, fs::File, path::PathBuf, rc::Rc, sync::Arc}; use arrow::record_batch::{RecordBatch, RecordBatchReader}; use arrow::{array, array::Array, datatypes}; use delorean_mem_qe::column; -use delorean_mem_qe::column::{Column}; +use delorean_mem_qe::column::Column; use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment}; -use delorean_mem_qe::Store; +use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; use parquet::arrow::arrow_reader::ArrowReader; // use snafu::ensure; -use snafu::Snafu; use datatypes::TimeUnit; +use snafu::Snafu; #[derive(Snafu, Debug, Clone, Copy, PartialEq)] pub enum Error { @@ -29,21 +29,29 @@ fn format_size(sz: usize) -> String { human_format::Formatter::new().format(sz as f64) } - fn main() { env_logger::init(); //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); - // This one was having issues being read into arrow with the last row groups - let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet"); + + //let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet"); + + // smaller file to test with + let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000068644-000000002/http_api_requests_total.parquet"); + let r = File::open(&path).unwrap(); let file_size = fs::metadata(&path).expect("read metadata").len(); - println!("Reading {} ({}) bytes of parquet from {:?}....", - format_size(file_size as usize), file_size, path); + println!( + "Reading {} ({}) bytes of parquet from {:?}....", + format_size(file_size as usize), + file_size, + path + ); //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap(); let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); - let mut reader = parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); + let mut reader = + parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); let batch_size = 60000; let record_batch_reader = reader.get_record_reader(batch_size).unwrap(); @@ -58,8 +66,10 @@ fn main() { format_size(store.size()), store.size() ); + let store = Arc::new(store); time_select_with_pred(&store); + time_datafusion_select_with_pred(store.clone()); time_first_host(&store); time_sum_range(&store); time_count_range(&store); @@ -184,10 +194,7 @@ fn main() { // } } -fn build_store( - mut reader: impl RecordBatchReader, - store: &mut Store, -) -> Result<(), Error> { +fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<(), Error> { let mut total_rows_read = 0; let start = std::time::Instant::now(); loop { @@ -202,20 +209,28 @@ fn build_store( total_rows_read += rb.num_rows(); let segment = convert_record_batch(rb)?; store.add_segment(segment); - }, + } Ok(None) => { let now = std::time::Instant::now(); - println!("Completed loading {} rows in {:?}", total_rows_read, now - start); - return Ok(()) + println!( + "Completed loading {} rows in {:?}", + total_rows_read, + now - start + ); + return Ok(()); } } } } fn convert_record_batch(rb: RecordBatch) -> Result { - let mut segment = Segment::new(rb.num_rows()); + let mut segment = Segment::new(rb.num_rows(), rb.schema().clone()); - println!("Loading record batch: cols {:?} rows {:?}", rb.num_columns(), rb.num_rows()); + println!( + "Loading record batch: cols {:?} rows {:?}", + rb.num_columns(), + rb.num_rows() + ); for (i, column) in rb.columns().iter().enumerate() { match *column.data_type() { datatypes::DataType::Float64 => { @@ -243,7 +258,10 @@ fn convert_record_batch(rb: RecordBatch) -> Result { if column.null_count() > 0 { panic!("null times"); } - let arr = column.as_any().downcast_ref::().unwrap(); + let arr = column + .as_any() + .downcast_ref::() + .unwrap(); let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); @@ -414,7 +432,6 @@ fn time_count_range(store: &Store) { // FROM measurement // WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" // AND "env" = "prod01-eu-central-1" -// fn time_select_with_pred(store: &Store) { let repeat = 100; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); @@ -447,6 +464,42 @@ fn time_select_with_pred(store: &Store) { ); } +/// DataFusion implementation of +// +// SELECT env, method, host, counter, time +// FROM measurement +// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" +// AND "env" = "prod01-eu-central-1" +// +// Use the hard coded timestamp values 1590036110000000, 1590040770000000 + +fn time_datafusion_select_with_pred(store: Arc) { + let mut query_engine = DeloreanQueryEngine::new(store); + + let sql_string = r#"SELECT env, method, host, counter, time + FROM measurement + WHERE time::BIGINT >= 1590036110000000 + AND time::BIGINT < 1590040770000000 + AND env = 'prod01-eu-central-1' + "#; + + let repeat = 100; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; + for _ in 0..repeat { + let now = std::time::Instant::now(); + track += query_engine.run_sql(&sql_string); + total_time += now.elapsed(); + } + println!( + "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + // // SELECT env, method, host, counter, time // FROM measurement diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 5fa99ebb1a..c8617e1cf0 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -1,8 +1,10 @@ +pub mod adapter; pub mod column; pub mod encoding; pub mod segment; pub mod sorter; +use arrow::datatypes::SchemaRef; use segment::{Segment, Segments}; #[derive(Debug, Default)] @@ -31,4 +33,13 @@ impl Store { pub fn segments(&self) -> Segments { Segments::new(self.segments.iter().collect::>()) } + + pub fn schema(&self) -> SchemaRef { + assert!( + !self.segments.is_empty(), + "Need to have at least one segment in a store" + ); + // assume all segments have the same schema + self.segments[0].schema() + } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 079b8b3dcd..8dee09d5f4 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap}; use super::column; use super::column::Column; +use arrow::datatypes::SchemaRef; // Only used in a couple of specific places for experimentation. const THREADS: usize = 16; @@ -16,9 +17,9 @@ pub struct Segment { } impl Segment { - pub fn new(rows: usize) -> Self { + pub fn new(rows: usize, schema: SchemaRef) -> Self { Self { - meta: SegmentMetaData::new(rows), + meta: SegmentMetaData::new(rows, schema), columns: vec![], time_column_idx: 0, } @@ -44,6 +45,10 @@ impl Segment { self.meta.time_range } + pub fn schema(&self) -> SchemaRef { + self.meta.schema() + } + pub fn add_column(&mut self, name: &str, c: column::Column) { assert_eq!( self.meta.rows, @@ -709,6 +714,7 @@ impl Segment { pub struct SegmentMetaData { size: usize, // TODO rows: usize, + schema: SchemaRef, column_names: Vec, time_range: (i64, i64), @@ -719,10 +725,11 @@ pub struct SegmentMetaData { } impl SegmentMetaData { - pub fn new(rows: usize) -> Self { + pub fn new(rows: usize, schema: SchemaRef) -> Self { let mut meta = Self { size: 0, rows, + schema, column_names: vec![], time_range: (0, 0), row_ids: croaring::Bitmap::create_with_capacity(rows as u32), @@ -731,6 +738,10 @@ impl SegmentMetaData { meta } + pub fn schema(&self) -> SchemaRef { + self.schema.clone() + } + pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { self.time_range.0 <= to && from <= self.time_range.1 } From 54e9d38589d57cf3ad212c9e1f35803d94fbbd90 Mon Sep 17 00:00:00 2001 From: alamb Date: Thu, 20 Aug 2020 20:51:26 -0400 Subject: [PATCH 30/73] chore: update the refs to github --- Cargo.lock | 3 ++- delorean_mem_qe/Cargo.toml | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e25b863d24..ed5f18fdfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,7 +109,6 @@ name = "arrow" version = "2.0.0-SNAPSHOT" source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" dependencies = [ - "arrow-flight", "chrono", "csv", "flatbuffers", @@ -128,7 +127,9 @@ dependencies = [ [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" dependencies = [ + "arrow-flight", "chrono", "csv", "flatbuffers", diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 1b79c94649..c2086ac66f 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -9,12 +9,12 @@ edition = "2018" [dependencies] delorean_table = { path = "../delorean_table" } -#arrow = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } -arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" } -#parquet = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } -parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" } -#datafusion = { git = "https://github.com/apache/arrow.git", rev="aa6889a74c57d6faea0d27ea8013d9b0c7ef809a", version = "2.0.0-SNAPSHOT" } -datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" } +arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } +parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } +datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } +#arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" } +#parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" } +#datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" } snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" From 957ff79e2f76c15fda19ac3a77785b3c0432bde8 Mon Sep 17 00:00:00 2001 From: alamb Date: Fri, 21 Aug 2020 11:04:03 -0400 Subject: [PATCH 31/73] docs: add additional documentation for sets of test parameters --- delorean_mem_qe/src/adapter.rs | 35 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs index 90138cbd29..0fcb5cb34a 100644 --- a/delorean_mem_qe/src/adapter.rs +++ b/delorean_mem_qe/src/adapter.rs @@ -3,25 +3,26 @@ use crate::Store; use arrow::{ datatypes::{Schema, SchemaRef}, - util::pretty, record_batch::{RecordBatch, RecordBatchReader}, + record_batch::{RecordBatch, RecordBatchReader}, + util::pretty, }; use datafusion::prelude::*; use datafusion::{ datasource::TableProvider, execution::{ context::ExecutionContextState, - physical_plan::{ExecutionPlan, Partition, common::RecordBatchIterator}, + physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition}, }, logicalplan::{make_logical_plan_node, Expr, LogicalPlan}, lp::LogicalPlanNode, optimizer::utils, }; +use crate::column; use std::{ fmt, sync::{Arc, Mutex}, }; -use crate::column; /// Wrapper to adapt a Store to a DataFusion "TableProvider" -- /// eventually we could also implement this directly on Store @@ -118,8 +119,7 @@ impl DeloreanQueryEngine { fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan { if let LogicalPlan::Filter { predicate, input } = plan { // see if the input is a TableScan - if let LogicalPlan::TableScan { .. } = **input - { + if let LogicalPlan::TableScan { .. } = **input { return make_logical_plan_node(Box::new(SegmentScan::new( self.store.clone(), predicate.clone(), @@ -159,7 +159,6 @@ impl SegmentScan { predicate, } } - } impl LogicalPlanNode for SegmentScan { @@ -236,7 +235,7 @@ impl LogicalPlanNode for SegmentScan { let time_range = (1590036110000000, 1590040770000000); let string_predicate = StringPredicate { col_name: "env".into(), - value: "prod01-eu-central-1".into() + value: "prod01-eu-central-1".into(), }; Ok(Arc::new(SegmentScanExec::new( @@ -245,10 +244,8 @@ impl LogicalPlanNode for SegmentScan { string_predicate, ))) } - } - #[derive(Debug, Clone)] struct StringPredicate { col_name: String, @@ -262,13 +259,16 @@ pub struct SegmentScanExec { // Specialized predicates to apply time_range: (i64, i64), - string_predicate: StringPredicate - + string_predicate: StringPredicate, } impl SegmentScanExec { fn new(store: Arc, time_range: (i64, i64), string_predicate: StringPredicate) -> Self { - SegmentScanExec { store , time_range, string_predicate } + SegmentScanExec { + store, + time_range, + string_predicate, + } } } @@ -279,7 +279,7 @@ impl ExecutionPlan for SegmentScanExec { fn partitions(&self) -> datafusion::error::Result>> { let store = self.store.clone(); - Ok(vec![Arc::new(SegmentPartition{ + Ok(vec![Arc::new(SegmentPartition { store, time_range: self.time_range, string_predicate: self.string_predicate.clone(), @@ -292,11 +292,12 @@ struct SegmentPartition { store: Arc, time_range: (i64, i64), string_predicate: StringPredicate, - } impl Partition for SegmentPartition { - fn execute(&self) -> datafusion::error::Result>> { + fn execute( + &self, + ) -> datafusion::error::Result>> { let combined_results: Vec> = vec![]; let segments = self.store.segments(); @@ -305,7 +306,6 @@ impl Partition for SegmentPartition { let col_name = &self.string_predicate.col_name; let scalar = column::Scalar::String(&self.string_predicate.value); - // Here let _columns = segments.read_filter_eq( self.time_range, @@ -322,12 +322,9 @@ impl Partition for SegmentPartition { // If we were implementing this for real, we would not convert // `columns` into RecordBatches and feed them back out - Ok(Arc::new(Mutex::new(RecordBatchIterator::new( self.store.schema().clone(), combined_results, )))) - - } } From 0132a600b551bbd5821827b7fa8d73d456c355fa Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 25 Aug 2020 11:37:59 +0100 Subject: [PATCH 32/73] feat: add schema wrapper for sort order --- delorean_mem_qe/src/segment.rs | 54 +++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 8dee09d5f4..5e8ff3549b 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use super::column; use super::column::Column; @@ -7,6 +7,45 @@ use arrow::datatypes::SchemaRef; // Only used in a couple of specific places for experimentation. const THREADS: usize = 16; +#[derive(Debug)] +pub struct Schema { + _ref: SchemaRef, + col_sort_order: Vec, +} + +impl Schema { + pub fn new(schema: SchemaRef) -> Self { + Self { + _ref: schema, + col_sort_order: vec![], + } + } + + pub fn with_sort_order(schema: SchemaRef, sort_order: Vec) -> Self { + let set = sort_order.iter().collect::>(); + assert_eq!(set.len(), sort_order.len()); + assert!(sort_order.len() <= schema.fields().len()); + + Self { + _ref: schema, + col_sort_order: sort_order, + } + } + + pub fn sort_order(&self) -> &[usize] { + self.col_sort_order.as_slice() + } + + pub fn schema_ref(&self) -> SchemaRef { + self._ref.clone() + } + + pub fn cols(&self) -> usize { + let len = &self._ref.fields().len(); + *len + } +} + #[derive(Debug)] pub struct Segment { meta: SegmentMetaData, @@ -17,10 +56,11 @@ pub struct Segment { } impl Segment { - pub fn new(rows: usize, schema: SchemaRef) -> Self { + pub fn new(rows: usize, schema: Schema) -> Self { + let cols = schema.cols(); Self { meta: SegmentMetaData::new(rows, schema), - columns: vec![], + columns: Vec::with_capacity(cols), time_column_idx: 0, } } @@ -709,12 +749,12 @@ impl Segment { } /// Meta data for a segment. This data is mainly used to determine if a segment -/// may contain value for answering a query. +/// may contain a value that can answer a query. #[derive(Debug)] pub struct SegmentMetaData { size: usize, // TODO rows: usize, - schema: SchemaRef, + schema: Schema, column_names: Vec, time_range: (i64, i64), @@ -725,7 +765,7 @@ pub struct SegmentMetaData { } impl SegmentMetaData { - pub fn new(rows: usize, schema: SchemaRef) -> Self { + pub fn new(rows: usize, schema: Schema) -> Self { let mut meta = Self { size: 0, rows, @@ -739,7 +779,7 @@ impl SegmentMetaData { } pub fn schema(&self) -> SchemaRef { - self.schema.clone() + self.schema.schema_ref() } pub fn overlaps_time_range(&self, from: i64, to: i64) -> bool { From 577834c90f3734a4b0e2425c2cd1efa564057224 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 26 Aug 2020 12:38:28 +0100 Subject: [PATCH 33/73] test: fix broken tests --- delorean_mem_qe/src/encoding.rs | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 6d9ce4e4f8..7b265facf1 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -622,7 +622,8 @@ mod test { assert_eq!(bm.to_vec(), vec![7, 9, 10, 11, 13]); let bm = col.row_ids_single_cmp_roaring(&20, std::cmp::Ordering::Equal); - assert_eq!(bm.to_vec(), vec![]); + let exp: Vec = Vec::new(); + assert_eq!(bm.to_vec(), exp); } #[test] @@ -713,25 +714,13 @@ mod test { assert_eq!(drle.value(7).unwrap(), "zoo"); assert_eq!(drle.value(8).unwrap(), "zoo"); - let row_ids = drle - .index_row_ids - .get(&Some("hello".to_string())) - .unwrap() - .to_vec(); + let row_ids = drle.index_row_ids.get(&0).unwrap().to_vec(); assert_eq!(row_ids, vec![0, 1, 3, 4, 5]); - let row_ids = drle - .index_row_ids - .get(&Some("world".to_string())) - .unwrap() - .to_vec(); + let row_ids = drle.index_row_ids.get(&1).unwrap().to_vec(); assert_eq!(row_ids, vec![2]); - let row_ids = drle - .index_row_ids - .get(&Some("zoo".to_string())) - .unwrap() - .to_vec(); + let row_ids = drle.index_row_ids.get(&2).unwrap().to_vec(); assert_eq!(row_ids, vec![6, 7, 8]); } From d1f9ca3acf55ca3096958e33a613f70199d43643 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 26 Aug 2020 15:44:18 +0100 Subject: [PATCH 34/73] feat: add support for providing column sort --- delorean_mem_qe/src/bin/main.rs | 224 +++++++++++--------------------- delorean_mem_qe/src/segment.rs | 136 ++++++++++++++----- 2 files changed, 177 insertions(+), 183 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index d371cf87e1..28e4e7576f 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -1,11 +1,19 @@ -use std::{fs, fs::File, path::PathBuf, rc::Rc, sync::Arc}; +use std::{ + env, + ffi::OsStr, + fs, + fs::File, + path::{Path, PathBuf}, + rc::Rc, + sync::Arc, +}; use arrow::record_batch::{RecordBatch, RecordBatchReader}; -use arrow::{array, array::Array, datatypes}; +use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; use delorean_mem_qe::column::Column; -use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Segment}; +use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Schema, Segment}; use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; use parquet::arrow::arrow_reader::ArrowReader; @@ -31,34 +39,21 @@ fn format_size(sz: usize) -> String { fn main() { env_logger::init(); + let args: Vec = env::args().collect(); - //let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); - - //let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000089174-000000004/http_api_requests_total.parquet"); - - // smaller file to test with - let path = PathBuf::from("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000068644-000000002/http_api_requests_total.parquet"); - - let r = File::open(&path).unwrap(); - let file_size = fs::metadata(&path).expect("read metadata").len(); - println!( - "Reading {} ({}) bytes of parquet from {:?}....", - format_size(file_size as usize), - file_size, - path - ); - - //let r = File::open("/Users/alamb/Software/query_testing/cloud2_sli_dashboard_query.ingested/data/000000000095062-000000006/http_api_requests_total.parquet").unwrap(); - let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); - let mut reader = - parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); - let batch_size = 60000; - let record_batch_reader = reader.get_record_reader(batch_size).unwrap(); - - //let reader = ipc::reader::StreamReader::try_new(r).unwrap(); + let path = &args[1]; + let mut sort_order = vec![]; + if let Some(arg) = args.get(2) { + sort_order = arg.split(',').collect::>(); + println!("sort is {:?}", sort_order); + }; let mut store = Store::default(); - build_store(record_batch_reader, &mut store).unwrap(); + match Path::new(path).extension().and_then(OsStr::to_str) { + Some("arrow") => build_arrow_store(path, &mut store, sort_order).unwrap(), + Some("parquet") => build_parquet_store(path, &mut store, sort_order).unwrap(), + _ => panic!("unsupported file type"), + } println!( "total segments {:?} with total size {} ({})", @@ -76,127 +71,49 @@ fn main() { time_group_single_with_pred(&store); time_group_by_multi_agg_count(&store); time_group_by_multi_agg_sorted_count(&store); - - // time_column_min_time(&store); - // time_column_max_time(&store); - // time_column_first(&store); - // let segments = store.segments(); - // let res = segments.last("host").unwrap(); - // println!("{:?}", res); - - // let segments = segments - // .filter_by_time(1590036110000000, 1590044410000000) - // .filter_by_predicate_eq("env", &column::Scalar::String("prod01-eu-central-1")); - // let res = segments.first( - // "env", - // &column::Scalar::String("prod01-eu-central-1"), - // 1590036110000000, - // ); - // println!("{:?}", res); - // let segments = segments.filter_by_time(1590036110000000, 1590044410000000); - // println!("{:?}", segments.last("host")); - // println!("{:?}", segments.segments().last().unwrap().row(14899)); - - // time_row_by_last_ts(&store); - - // let rows = segments - // .segments() - // .last() - // .unwrap() - // .filter_by_predicate_eq( - // Some((1590040770000000, 1590040790000000)), - // vec![ - // ("env", Some(&column::Scalar::String("prod01-us-west-2"))), - // ("method", Some(&column::Scalar::String("GET"))), - // ( - // "host", - // Some(&column::Scalar::String("queryd-v1-75bc6f7886-57pxd")), - // ), - // ], - // ) - // .unwrap(); - - // for row_id in rows.iter() { - // println!( - // "{:?} - {:?}", - // row_id, - // segments.segments().last().unwrap().row(row_id as usize) - // ); - // } - // println!("{:?}", rows.cardinality()); - - // time_row_by_preds(&store); - - // let segments = store.segments(); - // let columns = segments.read_filter_eq( - // (1590036110000000, 1590040770000000), - // &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))], - // vec![ - // "env".to_string(), - // "method".to_string(), - // "host".to_string(), - // "counter".to_string(), - // "time".to_string(), - // ], - // ); - - // for (k, v) in columns { - // println!("COLUMN {:?}", k); - // // println!("ROWS ({:?}) {:?}", v.len(), 0); - // println!("ROWS ({}) {:?}", v, v.len()); - // } - - // loop { - // let now = std::time::Instant::now(); - // let segments = store.segments(); - // let groups = segments.read_group_eq( - // (0, 1590044410000000), - // &[], - // vec!["env".to_string(), "role".to_string()], - // vec![ - // ("counter".to_string(), Aggregate::Sum), - // // ("counter".to_string(), Aggregate::Count), - // ], - // ); - // println!("{:?} {:?}", groups, now.elapsed()); - // } - - // loop { - // let mut total_count = 0.0; - // let now = std::time::Instant::now(); - // for segment in segments.segments() { - // let (min, max) = segment.time_range(); - // let time_ids = segment - // .filter_by_predicates_eq((min, max), &vec![]) - // .unwrap(); - - // let group_ids = segment.group_by_column_ids("env").unwrap(); - // for (col_values, row_ids) in group_ids { - // // filter ids by time - // let mut result = row_ids.and(&time_ids); - // // let - // // println!( - // // "({:?}, {:?}) SUM OF COLUMN env={:?} is {:?} (count is {:?})", - // // min, - // // max, - // // col_values, - // // segment.sum_column(&"counter", &result), - // // result.cardinality(), - // // ); - // if let column::Scalar::Float(x) = - // segment.sum_column(&"counter", &mut result).unwrap() - // { - // total_count += x; - // } - // } - // } - // println!("Done ({:?}) in {:?}", total_count, now.elapsed()); - // } } -fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result<(), Error> { +fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { + let path = PathBuf::from(path); + let r = File::open(&path).unwrap(); + let file_size = fs::metadata(&path).expect("read metadata").len(); + println!( + "Reading {} ({}) bytes of Parquet from {:?}....", + format_size(file_size as usize), + file_size, + path + ); + + let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); + let mut reader = + parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); + let batch_size = 60000; + let record_batch_reader = reader.get_record_reader(batch_size).unwrap(); + build_store(record_batch_reader, store, sort_order) +} + +fn build_arrow_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { + let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); + let file_size = fs::metadata(&path).expect("read metadata").len(); + println!( + "Reading {} ({}) bytes of Arrow from {:?}....", + format_size(file_size as usize), + file_size, + path + ); + + let reader = ipc::reader::StreamReader::try_new(r).unwrap(); + build_store(reader, store, sort_order) +} + +fn build_store( + mut reader: impl RecordBatchReader, + store: &mut Store, + sort_order: Vec<&str>, +) -> Result<(), Error> { let mut total_rows_read = 0; let start = std::time::Instant::now(); + // let mut i = 0; loop { let rb = reader.next_batch(); match rb { @@ -206,8 +123,15 @@ fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result< // i += 1; // continue; // } + let schema = Schema::with_sort_order( + rb.schema(), + sort_order.iter().map(|s| s.to_string()).collect(), + ); + total_rows_read += rb.num_rows(); - let segment = convert_record_batch(rb)?; + let mut segment = Segment::new(rb.num_rows(), schema); + convert_record_batch(rb, &mut segment)?; + store.add_segment(segment); } Ok(None) => { @@ -223,9 +147,7 @@ fn build_store(mut reader: impl RecordBatchReader, store: &mut Store) -> Result< } } -fn convert_record_batch(rb: RecordBatch) -> Result { - let mut segment = Segment::new(rb.num_rows(), rb.schema().clone()); - +fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Error> { println!( "Loading record batch: cols {:?} rows {:?}", rb.num_columns(), @@ -333,7 +255,7 @@ fn convert_record_batch(rb: RecordBatch) -> Result { ref d @ _ => panic!("unsupported datatype: {:?}", d), } } - Ok(segment) + Ok(()) } // @@ -582,7 +504,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { ]; for strat in &strats { - let repeat = 10; + let repeat = 10000; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 5e8ff3549b..d1a80ae389 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -10,7 +10,7 @@ const THREADS: usize = 16; #[derive(Debug)] pub struct Schema { _ref: SchemaRef, - col_sort_order: Vec, + col_sort_order: Vec, } impl Schema { @@ -21,7 +21,7 @@ impl Schema { } } - pub fn with_sort_order(schema: SchemaRef, sort_order: Vec) -> Self { + pub fn with_sort_order(schema: SchemaRef, sort_order: Vec) -> Self { let set = sort_order.iter().collect::>(); assert_eq!(set.len(), sort_order.len()); assert!(sort_order.len() <= schema.fields().len()); @@ -32,7 +32,7 @@ impl Schema { } } - pub fn sort_order(&self) -> &[usize] { + pub fn sort_order(&self) -> &[String] { self.col_sort_order.as_slice() } @@ -65,30 +65,6 @@ impl Segment { } } - pub fn num_rows(&self) -> usize { - self.meta.rows - } - - pub fn column_names(&self) -> &[String] { - &self.meta.column_names - } - - /// column returns the column with name - pub fn column(&self, name: &str) -> Option<&column::Column> { - if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) { - return self.columns.get(*id); - } - None - } - - pub fn time_range(&self) -> (i64, i64) { - self.meta.time_range - } - - pub fn schema(&self) -> SchemaRef { - self.meta.schema() - } - pub fn add_column(&mut self, name: &str, c: column::Column) { assert_eq!( self.meta.rows, @@ -115,6 +91,30 @@ impl Segment { self.columns.push(c); } + pub fn num_rows(&self) -> usize { + self.meta.rows + } + + pub fn column_names(&self) -> &[String] { + &self.meta.column_names + } + + /// column returns the column with name + pub fn column(&self, name: &str) -> Option<&column::Column> { + if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) { + return self.columns.get(*id); + } + None + } + + pub fn time_range(&self) -> (i64, i64) { + self.meta.time_range + } + + pub fn schema(&self) -> SchemaRef { + self.meta.schema() + } + // TODO - iterator.... /// Returns the size of the segment in bytes. pub fn size(&self) -> usize { @@ -193,6 +193,34 @@ impl Segment { None } + // Determines if a segment is already sorted by a group key. Only supports + // ascending ordering at the moment. If this function returns true then + // the columns being grouped on are naturally sorted and for basic + // aggregations should not need to be sorted or hashed. + fn group_key_sorted(&self, group_cols: &[String]) -> bool { + let sorted_by_cols = self.meta.schema.sort_order(); + if group_cols.len() > sorted_by_cols.len() { + // grouping by more columns than there are defined sorts. + return false; + } + + let mut covered = 0; + 'outer: for sc in sorted_by_cols { + // find col in group key - doesn't matter what location in group key + for gc in group_cols { + if sc == gc { + covered += 1; + continue 'outer; + } + } + + // didn't find this sorted column in group key. That's okay if there + // are no more columns being grouped + return covered == group_cols.len(); + } + true + } + pub fn aggregate_by_group_with_hash( &self, time_range: (i64, i64), @@ -419,8 +447,6 @@ impl Segment { group_column_encoded_values.push(None); } } - let group_col_sort_order = &(0..group_columns.len()).collect::>(); - // println!("grouped columns {:?}", group_column_encoded_values); // TODO(edd): we could do this with an iterator I expect. // @@ -460,9 +486,14 @@ impl Segment { } } - // now sort on the first grouping columns. Right now the order doesn't matter... let now = std::time::Instant::now(); - super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); + if self.group_key_sorted(group_columns) { + log::debug!("segment already sorted by group key {:?}", group_columns); + } else { + // now sort on the first grouping columns. Right now the order doesn't matter... + let group_col_sort_order = &(0..group_columns.len()).collect::>(); + super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); + } log::debug!("time checking sort {:?}", now.elapsed()); let mut group_itrs = all_columns @@ -1191,4 +1222,45 @@ pub enum GroupingStrategy { } #[cfg(test)] -mod test {} +mod test { + + use arrow::datatypes::*; + + #[test] + fn segment_group_key_sorted() { + let schema = super::Schema::with_sort_order( + arrow::datatypes::SchemaRef::new(Schema::new(vec![ + Field::new("env", DataType::Utf8, false), + Field::new("role", DataType::Utf8, false), + Field::new("path", DataType::Utf8, false), + Field::new("time", DataType::Int64, false), + ])), + vec![ + "env".to_string(), + "role".to_string(), + "path".to_string(), + "time".to_string(), + ], + ); + let s = super::Segment::new(0, schema); + + let cases = vec![ + (vec!["env"], true), + (vec!["role"], false), + (vec!["foo"], false), + (vec![], true), + (vec!["env", "role"], true), + (vec!["env", "role", "foo"], false), // group key contains non-sorted col + (vec!["env", "role", "path", "time"], true), + (vec!["env", "role", "path", "time", "foo"], false), // group key contains non-sorted col + (vec!["env", "path", "role"], true), // order of columns in group key does not matter + ]; + + for (group_key, expected) in cases { + assert_eq!( + s.group_key_sorted(&group_key.iter().map(|x| x.to_string()).collect::>()), + expected + ); + } + } +} From f588b9ff6171c62260081a95851bc10f4943ddf0 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 26 Aug 2020 21:51:46 +0100 Subject: [PATCH 35/73] refactor: move AggregateType and push aggregates down --- delorean_mem_qe/src/bin/main.rs | 46 +++++++++++++++++++++------------ delorean_mem_qe/src/column.rs | 39 ++++++++++++++++++++++++++-- delorean_mem_qe/src/encoding.rs | 12 +++++++++ delorean_mem_qe/src/segment.rs | 36 ++++++++++++++------------ 4 files changed, 97 insertions(+), 36 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 28e4e7576f..a58ce879d2 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -12,8 +12,8 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader}; use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; -use delorean_mem_qe::column::Column; -use delorean_mem_qe::segment::{Aggregate, GroupingStrategy, Schema, Segment}; +use delorean_mem_qe::column::{AggregateType, Column}; +use delorean_mem_qe::segment::{GroupingStrategy, Schema, Segment}; use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; use parquet::arrow::arrow_reader::ArrowReader; @@ -63,13 +63,13 @@ fn main() { ); let store = Arc::new(store); - time_select_with_pred(&store); - time_datafusion_select_with_pred(store.clone()); - time_first_host(&store); - time_sum_range(&store); - time_count_range(&store); - time_group_single_with_pred(&store); - time_group_by_multi_agg_count(&store); + // time_select_with_pred(&store); + // time_datafusion_select_with_pred(store.clone()); + // time_first_host(&store); + // time_sum_range(&store); + // time_count_range(&store); + // time_group_single_with_pred(&store); + // time_group_by_multi_agg_count(&store); time_group_by_multi_agg_sorted_count(&store); } @@ -113,7 +113,7 @@ fn build_store( ) -> Result<(), Error> { let mut total_rows_read = 0; let start = std::time::Instant::now(); - // let mut i = 0; + let mut i = 0; loop { let rb = reader.next_batch(); match rb { @@ -441,7 +441,7 @@ fn time_group_single_with_pred(store: &Store) { (1588834080000000, 1590044410000000), &[], &"env".to_string(), - &vec![("counter".to_string(), Aggregate::Count)], + &vec![("counter".to_string(), AggregateType::Count)], ); track += results.len(); } @@ -457,6 +457,12 @@ fn time_group_single_with_pred(store: &Store) { ); } +// +// SELECT COUNT(counter) +// FROM measurement +// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" +// GROUP BY "status", "method" +// fn time_group_by_multi_agg_count(store: &Store) { let strats = vec![ GroupingStrategy::HashGroup, @@ -477,7 +483,7 @@ fn time_group_by_multi_agg_count(store: &Store) { (1589000000000001, 1590044410000000), &[], vec!["status".to_string(), "method".to_string()], - vec![("counter".to_string(), Aggregate::Count)], + vec![("counter".to_string(), AggregateType::Count)], strat, ); @@ -495,16 +501,22 @@ fn time_group_by_multi_agg_count(store: &Store) { } } +// +// SELECT COUNT(counter) +// FROM measurement +// WHERE time >= "2020-05-21 04:41:50" AND time < "2020-05-21 05:59:30" +// GROUP BY "env", "role" +// fn time_group_by_multi_agg_sorted_count(store: &Store) { let strats = vec![ - GroupingStrategy::HashGroup, - GroupingStrategy::HashGroupConcurrent, + // GroupingStrategy::HashGroup, + // GroupingStrategy::HashGroupConcurrent, GroupingStrategy::SortGroup, - GroupingStrategy::SortGroupConcurrent, + // GroupingStrategy::SortGroupConcurrent, ]; for strat in &strats { - let repeat = 10000; + let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); @@ -515,7 +527,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { (1589000000000001, 1590044410000000), &[], vec!["env".to_string(), "role".to_string()], - vec![("counter".to_string(), Aggregate::Count)], + vec![("counter".to_string(), AggregateType::Count)], strat, ); diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index a2ac7e83c2..3ce8ee0357 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -103,6 +103,12 @@ pub enum Aggregate<'a> { Sum(Scalar<'a>), } +#[derive(Debug, Clone)] +pub enum AggregateType { + Count, + Sum, +} + impl<'a> Aggregate<'a> { pub fn update_with(&mut self, other: Scalar<'a>) { match self { @@ -319,8 +325,8 @@ impl Column { } } - /// Materialise all of the decoded values matching the provided logical - /// row ids. + /// Materialise the decoded value matching the provided logical + /// row id. pub fn value(&self, row_id: usize) -> Option { match self { Column::String(c) => { @@ -726,6 +732,27 @@ impl Column { } } + pub fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate { + match self { + Column::String(_) => unimplemented!("not implemented"), + Column::Float(c) => match agg_type { + AggregateType::Count => { + Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64) + } + AggregateType::Sum => { + Aggregate::Sum(Scalar::Float(c.sum_by_id_range(from_row_id, to_row_id))) + } + }, + + Column::Integer(_) => unimplemented!("not implemented"), + } + } + pub fn group_by_ids(&self) -> &std::collections::BTreeMap { match self { Column::String(c) => c.data.group_row_ids(), @@ -977,6 +1004,14 @@ impl Float { pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 { self.data.sum_by_ids(row_ids) } + + pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 { + self.data.sum_by_id_range(from_row_id, to_row_id) + } + + pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + self.data.count_by_id_range(from_row_id, to_row_id) + } } impl From<&[f64]> for Float { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 7b265facf1..8e9e282442 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -156,6 +156,18 @@ where bm } + pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T { + let mut res = T::default(); + for v in self.values[from_row_id..to_row_id].iter() { + res += *v; + } + res + } + + pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + to_row_id - from_row_id + } + // TODO(edd): make faster pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T { let mut res = T::default(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index d1a80ae389..f3c6288df6 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use super::column; -use super::column::Column; +use super::column::{AggregateType, Column}; use arrow::datatypes::SchemaRef; // Only used in a couple of specific places for experimentation. @@ -226,7 +226,7 @@ impl Segment { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], - aggregates: &[(String, Aggregate)], + aggregates: &[(String, AggregateType)], ) -> BTreeMap, Vec<(String, Option)>> { // println!("working segment {:?}", time_range); // Build a hash table - essentially, scan columns for matching row ids, @@ -323,7 +323,7 @@ impl Segment { let mut hash_table: HashMap< Vec>, - Vec<(&String, &Aggregate, Option)>, + Vec<(&String, &AggregateType, Option)>, > = HashMap::with_capacity(30000); let mut aggregate_row: Vec<(&str, Option)> = @@ -361,7 +361,7 @@ impl Segment { // a place-holder for each aggregate being executed. let group_key_entry = hash_table.entry(group_row).or_insert_with(|| { // TODO COULD BE MAP/COLLECT - let mut agg_results: Vec<(&String, &Aggregate, Option)> = + let mut agg_results: Vec<(&String, &AggregateType, Option)> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option @@ -396,8 +396,10 @@ impl Segment { }, None => { *cum_agg_value = match agg_type { - Aggregate::Count => Some(column::Aggregate::Count(0)), - Aggregate::Sum => Some(column::Aggregate::Sum(row_value.clone())), + AggregateType::Count => Some(column::Aggregate::Count(0)), + AggregateType::Sum => { + Some(column::Aggregate::Sum(row_value.clone())) + } } } } @@ -414,7 +416,7 @@ impl Segment { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], - aggregates: &[(String, Aggregate)], + aggregates: &[(String, AggregateType)], ) -> BTreeMap, Vec<(String, column::Aggregate)>> { // filter on predicates and time let filtered_row_ids: croaring::Bitmap; @@ -536,8 +538,8 @@ impl Segment { .zip(last_agg_row.iter()) .map(|((col_name, agg_type), curr_agg)| { let agg = match agg_type { - Aggregate::Count => column::Aggregate::Count(1), - Aggregate::Sum => column::Aggregate::Sum(curr_agg.clone()), + AggregateType::Count => column::Aggregate::Count(1), + AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()), }; (col_name.clone(), agg) }) @@ -717,8 +719,8 @@ impl Segment { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_column: &String, - aggregates: &Vec<(String, Aggregate)>, - ) -> BTreeMap> { + aggregates: &Vec<(String, column::AggregateType)>, + ) -> BTreeMap> { let mut grouped_results = BTreeMap::new(); let filter_row_ids: croaring::Bitmap; @@ -734,12 +736,12 @@ impl Segment { let mut filtered_row_ids = row_ids.and(&filter_row_ids); if !filtered_row_ids.is_empty() { // First calculate all of the aggregates for this grouped value - let mut aggs: Vec<((String, Aggregate), column::Aggregate)> = + let mut aggs: Vec<((String, AggregateType), column::Aggregate)> = Vec::with_capacity(aggregates.len()); for (col_name, agg) in aggregates { match &agg { - Aggregate::Sum => { + AggregateType::Sum => { aggs.push(( (col_name.to_string(), agg.clone()), column::Aggregate::Sum( @@ -747,7 +749,7 @@ impl Segment { ), // assuming no non-null group keys )); } - Aggregate::Count => { + AggregateType::Count => { aggs.push(( (col_name.to_string(), agg.clone()), column::Aggregate::Count( @@ -898,7 +900,7 @@ impl<'a> Segments<'a> { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, - aggregates: Vec<(String, Aggregate)>, + aggregates: Vec<(String, AggregateType)>, strategy: &GroupingStrategy, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { let (min, max) = time_range; @@ -957,7 +959,7 @@ impl<'a> Segments<'a> { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, - aggregates: Vec<(String, Aggregate)>, + aggregates: Vec<(String, AggregateType)>, concurrent: bool, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { if concurrent { @@ -1034,7 +1036,7 @@ impl<'a> Segments<'a> { time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, - aggregates: Vec<(String, Aggregate)>, + aggregates: Vec<(String, AggregateType)>, concurrent: bool, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { if concurrent { From ab866073e30f7a9ecf046f7c06b336fb61576b48 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 26 Aug 2020 22:44:45 +0100 Subject: [PATCH 36/73] perf: faster group by with sorted cols --- delorean_mem_qe/src/bin/main.rs | 6 +- delorean_mem_qe/src/segment.rs | 351 +++++++++++++++++++++++++++++++- 2 files changed, 349 insertions(+), 8 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index a58ce879d2..f89d7af645 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -509,10 +509,10 @@ fn time_group_by_multi_agg_count(store: &Store) { // fn time_group_by_multi_agg_sorted_count(store: &Store) { let strats = vec![ - // GroupingStrategy::HashGroup, - // GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::HashGroup, + GroupingStrategy::HashGroupConcurrent, GroupingStrategy::SortGroup, - // GroupingStrategy::SortGroupConcurrent, + GroupingStrategy::SortGroupConcurrent, ]; for strat in &strats { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index f3c6288df6..2e03f9d1d0 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -410,13 +410,37 @@ impl Segment { log::debug!("{:?}", hash_table); BTreeMap::new() } - pub fn aggregate_by_group_with_sort( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + if self.group_key_sorted(group_columns) { + log::info!("group key is already sorted {:?}", group_columns); + self.aggregate_by_group_with_sort_sorted( + time_range, + predicates, + group_columns, + aggregates, + ) + } else { + self.aggregate_by_group_with_sort_unsorted( + time_range, + predicates, + group_columns, + aggregates, + ) + } + } + + fn aggregate_by_group_with_sort_unsorted( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, AggregateType)], ) -> BTreeMap, Vec<(String, column::Aggregate)>> { // filter on predicates and time let filtered_row_ids: croaring::Bitmap; @@ -490,7 +514,7 @@ impl Segment { let now = std::time::Instant::now(); if self.group_key_sorted(group_columns) { - log::debug!("segment already sorted by group key {:?}", group_columns); + panic!("This shouldn't be called!!!"); } else { // now sort on the first grouping columns. Right now the order doesn't matter... let group_col_sort_order = &(0..group_columns.len()).collect::>(); @@ -595,6 +619,281 @@ impl Segment { BTreeMap::new() } + // this method assumes that the segment's columns are sorted such that a + // sort of columns is not required. + fn aggregate_by_group_with_sort_sorted( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, AggregateType)], + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + // filter on predicates and time + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { + filtered_row_ids = row_ids; + } else { + return BTreeMap::new(); + } + let total_rows = &filtered_row_ids.cardinality(); + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + + // materialise all encoded values for the matching rows in the columns + // we are grouping on and store each group as an iterator. + let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); + for group_column in group_columns { + if let Some(column) = self.column(&group_column) { + let encoded_values = column.encoded_values(&filtered_row_ids_vec); + assert_eq!( + filtered_row_ids.cardinality() as usize, + encoded_values.len() + ); + + group_column_encoded_values.push(encoded_values); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + let mut new_agg_cols = Vec::with_capacity(aggregates.len()); + for (column_name, agg_type) in aggregates { + new_agg_cols.push((column_name, agg_type, self.column(&column_name))); + } + + let mut group_itrs = group_column_encoded_values + .iter() + .map(|vector| { + if let column::Vector::Integer(v) = vector { + v.iter() + } else { + panic!("don't support grouping on non-encoded values"); + } + }) + .collect::>(); + + // this tracks the last seen group key row. When it changes we can emit + // the grouped aggregates. + let mut last_group_row = group_itrs + .iter_mut() + .map(|itr| itr.next().unwrap()) + .collect::>(); + + let mut curr_group_row = last_group_row.clone(); + + let mut results = BTreeMap::new(); + let mut processed_rows = 1; + + let mut group_key_start_row_id = 0; + let mut group_size = 0; + + while processed_rows < *total_rows { + // update next group key. + let mut group_key_changed = false; + for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { + let next_v = itr.next().unwrap(); + if curr_v != &next_v { + group_key_changed = true; + } + *curr_v = next_v; + } + + // group key changed - emit group row and aggregates. + if group_key_changed { + let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); + for (name, agg_type, col) in &new_agg_cols { + if let Some(c) = col { + let agg_result = c.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + group_key_aggregates.push((name, agg_result)); + } else { + panic!("figure this out"); + } + } + + let key = last_group_row.clone(); + results.insert(key, group_key_aggregates); + + // update group key + last_group_row = curr_group_row.clone(); + + // reset counters tracking group key row range + group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one? + group_size = 0; + } + + group_size += 1; + processed_rows += 1; + } + + // Emit final row + let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); + for (name, agg_type, col) in &new_agg_cols { + if let Some(c) = col { + let agg_result = c.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + group_key_aggregates.push((name, agg_result)); + } else { + panic!("figure this out"); + } + } + + let key = last_group_row.clone(); + results.insert(key, group_key_aggregates); + + log::info!("({:?} rows processed) {:?}", processed_rows, results); + // results + BTreeMap::new() + } + + // this method assumes that the segment's columns are sorted such that a + // sort of columns is not required. + fn window_aggregate_with_sort_sorted( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, AggregateType)], + window: i64, + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + // filter on predicates and time + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { + filtered_row_ids = row_ids; + } else { + return BTreeMap::new(); + } + let total_rows = &filtered_row_ids.cardinality(); + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + + // materialise all encoded values for the matching rows in the columns + // we are grouping on and store each group as an iterator. + let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); + for group_column in group_columns { + if let Some(column) = self.column(&group_column) { + let encoded_values = column.encoded_values(&filtered_row_ids_vec); + assert_eq!( + filtered_row_ids.cardinality() as usize, + encoded_values.len() + ); + + group_column_encoded_values.push(encoded_values); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + let mut new_agg_cols = Vec::with_capacity(aggregates.len()); + for (column_name, agg_type) in aggregates { + new_agg_cols.push((column_name, agg_type, self.column(&column_name))); + } + + let mut group_itrs = group_column_encoded_values + .iter() + .map(|vector| { + if let column::Vector::Integer(v) = vector { + v.iter() + } else { + panic!("don't support grouping on non-encoded values"); + } + }) + .collect::>(); + + // this tracks the last seen group key row. When it changes we can emit + // the grouped aggregates. + let mut last_group_row = group_itrs + .iter_mut() + .map(|itr| itr.next().unwrap()) + .collect::>(); + + let mut curr_group_row = last_group_row.clone(); + + let mut results = BTreeMap::new(); + let mut processed_rows = 1; + + let mut group_key_start_row_id = 0; + let mut group_size = 0; + + while processed_rows < *total_rows { + // update next group key. + let mut group_key_changed = false; + for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { + let next_v = itr.next().unwrap(); + if curr_v != &next_v { + group_key_changed = true; + } + *curr_v = next_v; + } + + // group key changed - emit group row and aggregates. + if group_key_changed { + let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); + for (name, agg_type, col) in &new_agg_cols { + if let Some(c) = col { + let agg_result = c.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + group_key_aggregates.push((name, agg_result)); + } else { + panic!("figure this out"); + } + } + + let key = last_group_row.clone(); + results.insert(key, group_key_aggregates); + + // update group key + last_group_row = curr_group_row.clone(); + + // reset counters tracking group key row range + group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one? + group_size = 0; + } + + group_size += 1; + processed_rows += 1; + } + + // Emit final row + let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); + for (name, agg_type, col) in &new_agg_cols { + if let Some(c) = col { + let agg_result = c.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + group_key_aggregates.push((name, agg_result)); + } else { + panic!("figure this out"); + } + } + + let key = last_group_row.clone(); + results.insert(key, group_key_aggregates); + + log::info!("({:?} rows processed) {:?}", processed_rows, results); + // results + BTreeMap::new() + } + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); @@ -1058,7 +1357,7 @@ impl<'a> Segments<'a> { &aggregates, ); log::info!( - "processed segment {:?} using multi-threaded hash-grouping in {:?}", + "processed segment {:?} using multi-threaded sort in {:?}", segment.time_range(), now.elapsed() ) @@ -1078,7 +1377,7 @@ impl<'a> Segments<'a> { &aggregates_arc.clone(), ); log::info!( - "processed segment {:?} using multi-threaded hash-grouping in {:?}", + "processed segment {:?} using multi-threaded sort in {:?}", segment.time_range(), now.elapsed() ) @@ -1099,7 +1398,7 @@ impl<'a> Segments<'a> { &aggregates, ); log::info!( - "processed segment {:?} using single-threaded hash-grouping in {:?}", + "processed segment {:?} using single-threaded sort in {:?}", segment.time_range(), now.elapsed() ) @@ -1129,6 +1428,48 @@ impl<'a> Segments<'a> { min_min } + pub fn window_agg_eq( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: Vec, + aggregates: Vec<(String, AggregateType)>, + strategy: &GroupingStrategy, + window: i64, + ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + let (min, max) = time_range; + if max <= min { + panic!("max <= min"); + } + + match strategy { + GroupingStrategy::HashGroup => { + panic!("not yet"); + } + GroupingStrategy::HashGroupConcurrent => { + panic!("not yet"); + } + GroupingStrategy::SortGroup => { + return self.read_group_eq_sort( + time_range, + predicates, + group_columns, + aggregates, + false, + ) + } + GroupingStrategy::SortGroupConcurrent => { + panic!("not yet"); + } + } + + // TODO(edd): merge results - not expensive really... + // let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = + // BTreeMap::new(); + + // cum_results + } + /// Returns the maximum value for a column in a set of segments. pub fn column_max(&self, column_name: &str) -> Option { if self.segments.is_empty() { From 4a153f5f7d5dea62e17bc84708a7d65e39960f95 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 27 Aug 2020 13:59:24 +0100 Subject: [PATCH 37/73] feat: generalise windowing to sorted/unsorted groups --- delorean_mem_qe/src/bin/main.rs | 31 +++- delorean_mem_qe/src/encoding.rs | 3 +- delorean_mem_qe/src/segment.rs | 305 ++++++++++++++++++++++++++++---- 3 files changed, 305 insertions(+), 34 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index f89d7af645..e8bfc3dbd7 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -70,7 +70,8 @@ fn main() { // time_count_range(&store); // time_group_single_with_pred(&store); // time_group_by_multi_agg_count(&store); - time_group_by_multi_agg_sorted_count(&store); + // time_group_by_multi_agg_sorted_count(&store); + time_window_agg_sorted_count(&store); } fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { @@ -544,3 +545,31 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { ); } } + +fn time_window_agg_sorted_count(store: &Store) { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.window_agg_eq( + (1589000000000001, 1590044410000000), + &[], + vec!["env".to_string(), "role".to_string()], + vec![("counter".to_string(), AggregateType::Count)], + 60000000 * 10, // 10 minutes + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "time_window_agg_sorted_count ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + total_max + ); +} diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 8e9e282442..d6a865a5f1 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -51,13 +51,14 @@ where out.push(self.values[chunks[2]]); out.push(self.values[chunks[1]]); out.push(self.values[chunks[0]]); - // out.push(self.values[row_id]); } let rem = row_ids.len() % 4; for &i in &row_ids[row_ids.len() - rem..row_ids.len()] { out.push(self.values[i]); } + + assert_eq!(out.len(), row_ids.len()); out } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 2e03f9d1d0..e99c7e0181 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -410,6 +410,7 @@ impl Segment { log::debug!("{:?}", hash_table); BTreeMap::new() } + pub fn aggregate_by_group_with_sort( &self, time_range: (i64, i64), @@ -756,6 +757,35 @@ impl Segment { BTreeMap::new() } + pub fn window_aggregate_with_sort( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, AggregateType)], + window: i64, + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + if self.group_key_sorted(group_columns) { + log::info!("group key is already sorted {:?}", group_columns); + self.window_aggregate_with_sort_sorted( + time_range, + predicates, + group_columns, + aggregates, + window, + ) + } else { + log::info!("group key needs sorting {:?}", group_columns); + self.window_aggregate_with_sort_unsorted( + time_range, + predicates, + group_columns, + aggregates, + window, + ) + } + } + // this method assumes that the segment's columns are sorted such that a // sort of columns is not required. fn window_aggregate_with_sort_sorted( @@ -809,16 +839,24 @@ impl Segment { if let column::Vector::Integer(v) = vector { v.iter() } else { - panic!("don't support grouping on non-encoded values"); + panic!("don't support grouping on non-encoded values or time"); } }) .collect::>(); // this tracks the last seen group key row. When it changes we can emit // the grouped aggregates. + let group_itrs_len = &group_itrs.len(); let mut last_group_row = group_itrs .iter_mut() - .map(|itr| itr.next().unwrap()) + .enumerate() + .map(|(i, itr)| { + if i == group_itrs_len - 1 { + // time column - apply window function + return itr.next().unwrap() / window * window; + } + *itr.next().unwrap() + }) .collect::>(); let mut curr_group_row = last_group_row.clone(); @@ -832,9 +870,18 @@ impl Segment { while processed_rows < *total_rows { // update next group key. let mut group_key_changed = false; - for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { - let next_v = itr.next().unwrap(); - if curr_v != &next_v { + for (i, (curr_v, itr)) in curr_group_row + .iter_mut() + .zip(group_itrs.iter_mut()) + .enumerate() + { + let next_v = if i == group_itrs_len - 1 { + // time column - apply window function + itr.next().unwrap() / window * window + } else { + *itr.next().unwrap() + }; + if *curr_v != next_v { group_key_changed = true; } *curr_v = next_v; @@ -886,7 +933,7 @@ impl Segment { } } - let key = last_group_row.clone(); + let key = last_group_row; results.insert(key, group_key_aggregates); log::info!("({:?} rows processed) {:?}", processed_rows, results); @@ -894,6 +941,206 @@ impl Segment { BTreeMap::new() } + fn window_aggregate_with_sort_unsorted( + &self, + time_range: (i64, i64), + predicates: &[(&str, Option<&column::Scalar>)], + group_columns: &[String], + aggregates: &[(String, AggregateType)], + window: i64, + ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + // filter on predicates and time + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { + filtered_row_ids = row_ids; + } else { + return BTreeMap::new(); + } + let total_rows = &filtered_row_ids.cardinality(); + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + + // materialise all encoded values for the matching rows in the columns + // we are grouping on and store each group as an iterator. + let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); + for group_column in group_columns { + if let Some(column) = self.column(&group_column) { + let encoded_values = column.encoded_values(&filtered_row_ids_vec); + assert_eq!( + filtered_row_ids.cardinality() as usize, + encoded_values.len() + ); + group_column_encoded_values.push(Some(encoded_values)); + } else { + group_column_encoded_values.push(None); + } + } + + // TODO(edd): we could do this with an iterator I expect. + // + // materialise all decoded values for the rows in the columns we are + // aggregating on. + let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); + for (column_name, _) in aggregates { + if let Some(column) = self.column(&column_name) { + let decoded_values = column.values(&filtered_row_ids_vec); + assert_eq!( + filtered_row_ids.cardinality() as usize, + decoded_values.len() + ); + aggregate_column_decoded_values.push((column_name, Some(decoded_values))); + } else { + aggregate_column_decoded_values.push((column_name, None)); + } + } + + let mut all_columns = Vec::with_capacity( + group_column_encoded_values.len() + aggregate_column_decoded_values.len(), + ); + + for gc in group_column_encoded_values { + if let Some(p) = gc { + all_columns.push(p); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + for ac in aggregate_column_decoded_values { + if let (_, Some(p)) = ac { + all_columns.push(p); + } else { + panic!("need to handle no results for filtering/grouping..."); + } + } + + let now = std::time::Instant::now(); + if self.group_key_sorted(&group_columns) { + panic!("This shouldn't be called!!!"); + } else { + // now sort on the first grouping columns. Right now the order doesn't matter... + let group_col_sort_order = &(0..group_columns.len()).collect::>(); + super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); + } + log::debug!("time checking sort {:?}", now.elapsed()); + + let mut group_itrs = all_columns + .iter() + .take(group_columns.len()) // only use grouping columns + .map(|vector| { + if let column::Vector::Integer(v) = vector { + v.iter() + } else { + panic!("don't support grouping on non-encoded values"); + } + }) + .collect::>(); + + let mut aggregate_itrs = all_columns + .iter() + .skip(group_columns.len()) // only use grouping columns + .map(|v| column::VectorIterator::new(v)) + .collect::>(); + + // this tracks the last seen group key row. When it changes we can emit + // the grouped aggregates. + let mut last_group_row = group_itrs + .iter_mut() + .enumerate() + .map(|(i, itr)| { + if i == group_columns.len() - 1 { + // time column - apply window function + return itr.next().unwrap() / window * window; + } + *itr.next().unwrap() + }) + .collect::>(); + + let mut curr_group_row = last_group_row.clone(); + + // this tracks the last row for each column we are aggregating. + let last_agg_row: Vec = aggregate_itrs + .iter_mut() + .map(|itr| itr.next().unwrap()) + .collect(); + + // this keeps the current cumulative aggregates for the columns we + // are aggregating. + let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates + .iter() + .zip(last_agg_row.iter()) + .map(|((col_name, agg_type), curr_agg)| { + let agg = match agg_type { + AggregateType::Count => column::Aggregate::Count(1), + AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()), + }; + (col_name.clone(), agg) + }) + .collect(); + + let mut results = BTreeMap::new(); + let mut processed_rows = 1; + while processed_rows < *total_rows { + // update next group key. + let mut group_key_changed = false; + for (i, (curr_v, itr)) in curr_group_row + .iter_mut() + .zip(group_itrs.iter_mut()) + .enumerate() + { + let next_v = if i == group_columns.len() - 1 { + // time column - apply window function + itr.next().unwrap() / window * window + } else { + *itr.next().unwrap() + }; + if curr_v != &next_v { + group_key_changed = true; + } + *curr_v = next_v; + } + + // group key changed - emit group row and aggregates. + if group_key_changed { + let key = last_group_row.clone(); + results.insert(key, cum_aggregates.clone()); + + // update group key + last_group_row = curr_group_row.clone(); + + // reset cumulative aggregates + for (_, agg) in cum_aggregates.iter_mut() { + match agg { + column::Aggregate::Count(c) => { + *c = 0; + } + column::Aggregate::Sum(s) => s.reset(), + } + } + } + + // update aggregates + for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) { + let (_, curr_agg) = bind.0; + let next_value = bind.1.next().unwrap(); + curr_agg.update_with(next_value); + } + + processed_rows += 1; + } + + // Emit final row + results.insert(last_group_row, cum_aggregates); + + log::info!("({:?} rows processed) {:?}", processed_rows, results); + // results + BTreeMap::new() + } + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); @@ -1434,7 +1681,6 @@ impl<'a> Segments<'a> { predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, - strategy: &GroupingStrategy, window: i64, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { let (min, max) = time_range; @@ -1442,32 +1688,26 @@ impl<'a> Segments<'a> { panic!("max <= min"); } - match strategy { - GroupingStrategy::HashGroup => { - panic!("not yet"); - } - GroupingStrategy::HashGroupConcurrent => { - panic!("not yet"); - } - GroupingStrategy::SortGroup => { - return self.read_group_eq_sort( - time_range, - predicates, - group_columns, - aggregates, - false, - ) - } - GroupingStrategy::SortGroupConcurrent => { - panic!("not yet"); - } + // add time column to the group key + let mut group_columns = group_columns.clone(); + group_columns.push("time".to_string()); + + for segment in &self.segments { + let now = std::time::Instant::now(); + segment.window_aggregate_with_sort( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using windowed single-threaded sort in {:?}", + segment.time_range(), + now.elapsed() + ) } - - // TODO(edd): merge results - not expensive really... - // let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = - // BTreeMap::new(); - - // cum_results + BTreeMap::new() } /// Returns the maximum value for a column in a set of segments. @@ -1594,6 +1834,7 @@ mod test { (vec![], true), (vec!["env", "role"], true), (vec!["env", "role", "foo"], false), // group key contains non-sorted col + (vec!["env", "role", "time"], false), // time may be out of order due to path column (vec!["env", "role", "path", "time"], true), (vec!["env", "role", "path", "time", "foo"], false), // group key contains non-sorted col (vec!["env", "path", "role"], true), // order of columns in group key does not matter From ee46c194c85ce4d773107e39ffc864177f90279f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 27 Aug 2020 14:57:27 +0100 Subject: [PATCH 38/73] refactor: integrate windowing into grouping --- delorean_mem_qe/src/bin/main.rs | 65 +++++++++------ delorean_mem_qe/src/segment.rs | 142 +++++++++++++++++++++----------- 2 files changed, 134 insertions(+), 73 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index e8bfc3dbd7..e3c631642f 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -120,10 +120,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - // if i < 363 { - // i += 1; - // continue; - // } + if i < 363 { + i += 1; + continue; + } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -485,6 +485,7 @@ fn time_group_by_multi_agg_count(store: &Store) { &[], vec!["status".to_string(), "method".to_string()], vec![("counter".to_string(), AggregateType::Count)], + 0, strat, ); @@ -529,6 +530,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { &[], vec!["env".to_string(), "role".to_string()], vec![("counter".to_string(), AggregateType::Count)], + 0, strat, ); @@ -547,29 +549,40 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { } fn time_window_agg_sorted_count(store: &Store) { - let repeat = 10; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; - let segments = store.segments(); - for _ in 0..repeat { - let now = std::time::Instant::now(); + let strats = vec![ + // GroupingStrategy::HashGroup, + // GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::SortGroup, + // GroupingStrategy::SortGroupConcurrent, + ]; - let groups = segments.window_agg_eq( - (1589000000000001, 1590044410000000), - &[], - vec!["env".to_string(), "role".to_string()], - vec![("counter".to_string(), AggregateType::Count)], - 60000000 * 10, // 10 minutes + for strat in &strats { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.read_group_eq( + (1589000000000001, 1590044410000000), + &[], + vec!["env".to_string(), "role".to_string()], + vec![("counter".to_string(), AggregateType::Count)], + 60000000 * 10, // 10 minutes, + strat, + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "time_window_agg_sorted_count {:?} ran {:?} in {:?} {:?} / call {:?}", + strat, + repeat, + total_time, + total_time / repeat, + total_max ); - - total_time += now.elapsed(); - total_max += groups.len(); } - println!( - "time_window_agg_sorted_count ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - total_max - ); } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index e99c7e0181..bac015537e 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -417,6 +417,7 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], + window: i64, ) -> BTreeMap, Vec<(String, column::Aggregate)>> { if self.group_key_sorted(group_columns) { log::info!("group key is already sorted {:?}", group_columns); @@ -425,13 +426,16 @@ impl Segment { predicates, group_columns, aggregates, + window, ) } else { + log::info!("group key needs sorting {:?}", group_columns); self.aggregate_by_group_with_sort_unsorted( time_range, predicates, group_columns, aggregates, + window, ) } } @@ -442,7 +446,17 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], + window: i64, ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + log::debug!("aggregate_by_group_with_sort_unsorted called"); + + if window > 0 { + // last column on group key should be time. + assert_eq!(group_columns[group_columns.len() - 1], "time"); + } else { + assert_ne!(group_columns[group_columns.len() - 1], "time"); + } + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -543,9 +557,17 @@ impl Segment { // this tracks the last seen group key row. When it changes we can emit // the grouped aggregates. + let group_itrs_len = &group_itrs.len(); let mut last_group_row = group_itrs .iter_mut() - .map(|itr| itr.next().unwrap()) + .enumerate() + .map(|(i, itr)| { + if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + return itr.next().unwrap() / window * window; + } + *itr.next().unwrap() + }) .collect::>(); let mut curr_group_row = last_group_row.clone(); @@ -575,8 +597,17 @@ impl Segment { while processed_rows < *total_rows { // update next group key. let mut group_key_changed = false; - for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { - let next_v = itr.next().unwrap(); + for (i, (curr_v, itr)) in curr_group_row + .iter_mut() + .zip(group_itrs.iter_mut()) + .enumerate() + { + let next_v = if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + itr.next().unwrap() / window * window + } else { + *itr.next().unwrap() + }; if curr_v != &next_v { group_key_changed = true; } @@ -615,7 +646,7 @@ impl Segment { // Emit final row results.insert(last_group_row, cum_aggregates); - log::debug!("{:?}", results); + log::info!("({:?} rows processed) {:?}", processed_rows, results); // results BTreeMap::new() } @@ -628,7 +659,17 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], + window: i64, ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + log::debug!("aggregate_by_group_with_sort_sorted called"); + + if window > 0 { + // last column on group key should be time. + assert_eq!(group_columns[group_columns.len() - 1], "time"); + } else { + assert_ne!(group_columns[group_columns.len() - 1], "time"); + } + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -679,9 +720,17 @@ impl Segment { // this tracks the last seen group key row. When it changes we can emit // the grouped aggregates. + let group_itrs_len = &group_itrs.len(); let mut last_group_row = group_itrs .iter_mut() - .map(|itr| itr.next().unwrap()) + .enumerate() + .map(|(i, itr)| { + if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + return itr.next().unwrap() / window * window; + } + *itr.next().unwrap() + }) .collect::>(); let mut curr_group_row = last_group_row.clone(); @@ -695,8 +744,17 @@ impl Segment { while processed_rows < *total_rows { // update next group key. let mut group_key_changed = false; - for (curr_v, itr) in curr_group_row.iter_mut().zip(group_itrs.iter_mut()) { - let next_v = itr.next().unwrap(); + for (i, (curr_v, itr)) in curr_group_row + .iter_mut() + .zip(group_itrs.iter_mut()) + .enumerate() + { + let next_v = if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + itr.next().unwrap() / window * window + } else { + *itr.next().unwrap() + }; if curr_v != &next_v { group_key_changed = true; } @@ -749,7 +807,7 @@ impl Segment { } } - let key = last_group_row.clone(); + let key = last_group_row; results.insert(key, group_key_aggregates); log::info!("({:?} rows processed) {:?}", processed_rows, results); @@ -1447,6 +1505,7 @@ impl<'a> Segments<'a> { predicates: &[(&str, Option<&column::Scalar>)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, + window: i64, strategy: &GroupingStrategy, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { let (min, max) = time_range; @@ -1456,48 +1515,28 @@ impl<'a> Segments<'a> { match strategy { GroupingStrategy::HashGroup => { - return self.read_group_eq_hash( - time_range, - predicates, - group_columns, - aggregates, - false, - ) + self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, false) } GroupingStrategy::HashGroupConcurrent => { - return self.read_group_eq_hash( - time_range, - predicates, - group_columns, - aggregates, - true, - ) - } - GroupingStrategy::SortGroup => { - return self.read_group_eq_sort( - time_range, - predicates, - group_columns, - aggregates, - false, - ) - } - GroupingStrategy::SortGroupConcurrent => { - return self.read_group_eq_sort( - time_range, - predicates, - group_columns, - aggregates, - true, - ) + self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, true) } + GroupingStrategy::SortGroup => self.read_group_eq_sort( + time_range, + predicates, + group_columns, + aggregates, + window, + false, + ), + GroupingStrategy::SortGroupConcurrent => self.read_group_eq_sort( + time_range, + predicates, + group_columns, + aggregates, + window, + true, + ), } - - // TODO(edd): merge results - not expensive really... - // let mut cum_results: BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> = - // BTreeMap::new(); - - // cum_results } fn read_group_eq_hash( @@ -1581,10 +1620,16 @@ impl<'a> Segments<'a> { &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], - group_columns: Vec, + mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, + window: i64, concurrent: bool, ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + if window > 0 { + // add time column to the group key + group_columns.push("time".to_string()); + } + if concurrent { let group_columns_arc = std::sync::Arc::new(group_columns); let aggregates_arc = std::sync::Arc::new(aggregates); @@ -1602,6 +1647,7 @@ impl<'a> Segments<'a> { predicates, &group_columns, &aggregates, + window, ); log::info!( "processed segment {:?} using multi-threaded sort in {:?}", @@ -1622,6 +1668,7 @@ impl<'a> Segments<'a> { predicates, &group_columns_arc.clone(), &aggregates_arc.clone(), + window, ); log::info!( "processed segment {:?} using multi-threaded sort in {:?}", @@ -1643,6 +1690,7 @@ impl<'a> Segments<'a> { predicates, &group_columns, &aggregates, + window, ); log::info!( "processed segment {:?} using single-threaded sort in {:?}", From 6e8e11c09cb4f3c9a10b558d580486ca9b610768 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 27 Aug 2020 15:09:51 +0100 Subject: [PATCH 39/73] refactor: move group methods to sort/stream --- delorean_mem_qe/src/bin/main.rs | 2 +- delorean_mem_qe/src/segment.rs | 201 +++++++++++++++----------------- 2 files changed, 98 insertions(+), 105 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index e3c631642f..9fcc3590cc 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -567,7 +567,7 @@ fn time_window_agg_sorted_count(store: &Store) { let groups = segments.read_group_eq( (1589000000000001, 1590044410000000), &[], - vec!["env".to_string(), "role".to_string()], + vec!["env".to_string(), "role".to_string(), "path".to_string()], vec![("counter".to_string(), AggregateType::Count)], 60000000 * 10, // 10 minutes, strat, diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index bac015537e..fa298fe74f 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -411,36 +411,7 @@ impl Segment { BTreeMap::new() } - pub fn aggregate_by_group_with_sort( - &self, - time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], - group_columns: &[String], - aggregates: &[(String, AggregateType)], - window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { - if self.group_key_sorted(group_columns) { - log::info!("group key is already sorted {:?}", group_columns); - self.aggregate_by_group_with_sort_sorted( - time_range, - predicates, - group_columns, - aggregates, - window, - ) - } else { - log::info!("group key needs sorting {:?}", group_columns); - self.aggregate_by_group_with_sort_unsorted( - time_range, - predicates, - group_columns, - aggregates, - window, - ) - } - } - - fn aggregate_by_group_with_sort_unsorted( + pub fn aggregate_by_group_using_sort( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], @@ -651,9 +622,13 @@ impl Segment { BTreeMap::new() } - // this method assumes that the segment's columns are sorted such that a - // sort of columns is not required. - fn aggregate_by_group_with_sort_sorted( + // Executes aggregates grouping by group_columns. If window is positive then + // a windowed aggregate result set is produced. + // + // `aggregate_by_group_using_stream` assumes that all columns being grouped + // on are part of the overall segment sort, therefore it does no sorting or + // hashing, and just streams aggregates out in order. + pub fn aggregate_by_group_using_stream( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], @@ -1641,19 +1616,36 @@ impl<'a> Segments<'a> { let aggregates = aggregates_arc.clone(); scope.spawn(move |_| { + let sorted = segment.group_key_sorted(&group_columns); + let now = std::time::Instant::now(); - segment.aggregate_by_group_with_sort( - time_range, - predicates, - &group_columns, - &aggregates, - window, - ); - log::info!( - "processed segment {:?} using multi-threaded sort in {:?}", - segment.time_range(), - now.elapsed() - ) + if sorted { + segment.aggregate_by_group_using_stream( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using multi-threaded STREAM in {:?}", + segment.time_range(), + now.elapsed() + ) + } else { + segment.aggregate_by_group_using_sort( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using multi-threaded SORT in {:?}", + segment.time_range(), + now.elapsed() + ) + } }); } }) @@ -1662,19 +1654,38 @@ impl<'a> Segments<'a> { let rem = self.segments.len() % THREADS; for segment in &self.segments[self.segments.len() - rem..] { + let group_columns = group_columns_arc.clone(); + let aggregates = aggregates_arc.clone(); + let sorted = segment.group_key_sorted(&group_columns); + let now = std::time::Instant::now(); - segment.aggregate_by_group_with_sort( - time_range, - predicates, - &group_columns_arc.clone(), - &aggregates_arc.clone(), - window, - ); - log::info!( - "processed segment {:?} using multi-threaded sort in {:?}", - segment.time_range(), - now.elapsed() - ) + if sorted { + segment.aggregate_by_group_using_stream( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using multi-threaded STREAM in {:?}", + segment.time_range(), + now.elapsed() + ) + } else { + segment.aggregate_by_group_using_sort( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using multi-threaded SORT in {:?}", + segment.time_range(), + now.elapsed() + ) + } } // TODO(edd): aggregate the aggregates. not expensive @@ -1684,19 +1695,36 @@ impl<'a> Segments<'a> { // Single threaded for segment in &self.segments { + let sorted = segment.group_key_sorted(&group_columns); + let now = std::time::Instant::now(); - segment.aggregate_by_group_with_sort( - time_range, - predicates, - &group_columns, - &aggregates, - window, - ); - log::info!( - "processed segment {:?} using single-threaded sort in {:?}", - segment.time_range(), - now.elapsed() - ) + if sorted { + segment.aggregate_by_group_using_stream( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using single-threaded STREAM in {:?}", + segment.time_range(), + now.elapsed() + ) + } else { + segment.aggregate_by_group_using_sort( + time_range, + predicates, + &group_columns, + &aggregates, + window, + ); + log::info!( + "processed segment {:?} using single-threaded SORT in {:?}", + segment.time_range(), + now.elapsed() + ) + } } BTreeMap::new() @@ -1723,41 +1751,6 @@ impl<'a> Segments<'a> { min_min } - pub fn window_agg_eq( - &self, - time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], - group_columns: Vec, - aggregates: Vec<(String, AggregateType)>, - window: i64, - ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { - let (min, max) = time_range; - if max <= min { - panic!("max <= min"); - } - - // add time column to the group key - let mut group_columns = group_columns.clone(); - group_columns.push("time".to_string()); - - for segment in &self.segments { - let now = std::time::Instant::now(); - segment.window_aggregate_with_sort( - time_range, - predicates, - &group_columns, - &aggregates, - window, - ); - log::info!( - "processed segment {:?} using windowed single-threaded sort in {:?}", - segment.time_range(), - now.elapsed() - ) - } - BTreeMap::new() - } - /// Returns the maximum value for a column in a set of segments. pub fn column_max(&self, column_name: &str) -> Option { if self.segments.is_empty() { From a1d57270fd83d4548f1a9a3d49c087dcc7825f04 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 27 Aug 2020 20:41:20 +0100 Subject: [PATCH 40/73] refactor: DRY up grouped aggregates to do windowing --- delorean_mem_qe/src/bin/main.rs | 10 +- delorean_mem_qe/src/column.rs | 130 ++++--- delorean_mem_qe/src/segment.rs | 605 ++++---------------------------- 3 files changed, 164 insertions(+), 581 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 9fcc3590cc..be480e8def 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -71,7 +71,7 @@ fn main() { // time_group_single_with_pred(&store); // time_group_by_multi_agg_count(&store); // time_group_by_multi_agg_sorted_count(&store); - time_window_agg_sorted_count(&store); + time_window_agg_count(&store); } fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { @@ -548,7 +548,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { } } -fn time_window_agg_sorted_count(store: &Store) { +fn time_window_agg_count(store: &Store) { let strats = vec![ // GroupingStrategy::HashGroup, // GroupingStrategy::HashGroupConcurrent, @@ -557,7 +557,7 @@ fn time_window_agg_sorted_count(store: &Store) { ]; for strat in &strats { - let repeat = 10; + let repeat = 10000; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); @@ -567,7 +567,7 @@ fn time_window_agg_sorted_count(store: &Store) { let groups = segments.read_group_eq( (1589000000000001, 1590044410000000), &[], - vec!["env".to_string(), "role".to_string(), "path".to_string()], + vec!["env".to_string(), "role".to_string()], vec![("counter".to_string(), AggregateType::Count)], 60000000 * 10, // 10 minutes, strat, @@ -577,7 +577,7 @@ fn time_window_agg_sorted_count(store: &Store) { total_max += groups.len(); } println!( - "time_window_agg_sorted_count {:?} ran {:?} in {:?} {:?} / call {:?}", + "time_window_agg_count {:?} ran {:?} in {:?} {:?} / call {:?}", strat, repeat, total_time, diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 3ce8ee0357..bc89cb23bd 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -156,35 +156,15 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { } } -// impl<'a> std::ops::Add<&Scalar<'a>> for Aggregate<'a> { -// type Output = Aggregate<'a>; - -// fn add(self, _rhs: &Scalar<'a>) -> Self::Output { -// match _rhs { -// Scalar::String(v) => {} -// Scalar::Float(v) => {} -// Scalar::Integer(v) => {} -// } -// // match self { -// // Self::Count(c) => { -// // match -// // if let Scalar::Count(other) = _rhs { -// // return Self::Count(c + other); -// // } else { -// // panic!("invalid"); -// // }; -// // } -// // Self::Sum(s) => { -// // if let Self::Sum(other) = _rhs { -// // return Self::Sum(s + other); -// // } else { -// // panic!("invalid"); -// // }; -// // } -// // } -// } -// } - +pub trait AggregatableByRange { + fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate; +} +/// A Vector is a materialised vector of values from a column. pub enum Vector<'a> { String(Vec<&'a Option>), Float(Vec), @@ -192,27 +172,48 @@ pub enum Vector<'a> { } impl<'a> Vector<'a> { - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn len(&self) -> usize { - match self { - Self::String(v) => v.len(), - Self::Float(v) => v.len(), - Self::Integer(v) => v.len(), + pub fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate { + match agg_type { + AggregateType::Count => { + Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) + } + AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), } } - pub fn get(&self, i: usize) -> Scalar<'a> { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar { match self { - // FIXME(edd): SORT THIS OPTION OUT - Self::String(v) => Scalar::String(v[i].as_ref().unwrap()), - Self::Float(v) => Scalar::Float(v[i]), - Self::Integer(v) => Scalar::Integer(v[i]), + Vector::String(_) => { + panic!("can't sum strings...."); + } + Vector::Float(values) => { + let mut res = 0.0; + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + res += *v; + } + Scalar::Float(res) + } + Vector::Integer(values) => { + let mut res = 0; + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + res += *v; + } + Scalar::Integer(res) + } } } + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + to_row_id - from_row_id + } + pub fn extend(&mut self, other: Self) { match self { Self::String(v) => { @@ -239,6 +240,27 @@ impl<'a> Vector<'a> { } } + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> usize { + match self { + Self::String(v) => v.len(), + Self::Float(v) => v.len(), + Self::Integer(v) => v.len(), + } + } + + pub fn get(&self, i: usize) -> Scalar<'a> { + match self { + // FIXME(edd): SORT THIS OPTION OUT + Self::String(v) => Scalar::String(v[i].as_ref().unwrap()), + Self::Float(v) => Scalar::Float(v[i]), + Self::Integer(v) => Scalar::Integer(v[i]), + } + } + pub fn swap(&mut self, a: usize, b: usize) { match self { Self::String(v) => { @@ -254,6 +276,17 @@ impl<'a> Vector<'a> { } } +impl AggregatableByRange for &Vector<'_> { + fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate { + Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) + } +} + /// VectorIterator allows a `Vector` to be iterated. Until vectors are drained /// Scalar values are emitted. pub struct VectorIterator<'a> { @@ -883,6 +916,17 @@ impl Column { } } +impl AggregatableByRange for &Column { + fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate { + Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) + } +} + impl From<&[f64]> for Column { fn from(values: &[f64]) -> Self { Self::Float(Float::from(values)) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index fa298fe74f..a9da3a6543 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -418,7 +418,7 @@ impl Segment { group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + ) -> Vec { log::debug!("aggregate_by_group_with_sort_unsorted called"); if window > 0 { @@ -433,10 +433,9 @@ impl Segment { if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { filtered_row_ids = row_ids; } else { - return BTreeMap::new(); + return vec![]; } let total_rows = &filtered_row_ids.cardinality(); - // println!("TOTAL FILTERED ROWS {:?}", total_rows); let filtered_row_ids_vec = filtered_row_ids .to_vec() @@ -508,7 +507,7 @@ impl Segment { } log::debug!("time checking sort {:?}", now.elapsed()); - let mut group_itrs = all_columns + let group_itrs = all_columns .iter() .take(group_columns.len()) // only use grouping columns .map(|vector| { @@ -520,106 +519,16 @@ impl Segment { }) .collect::>(); - let mut aggregate_itrs = all_columns + let mut aggregate_cols = Vec::with_capacity(aggregates.len()); + for (sorted_vector, (col_name, agg_type)) in all_columns .iter() - .skip(group_columns.len()) // only use grouping columns - .map(|v| column::VectorIterator::new(v)) - .collect::>(); - - // this tracks the last seen group key row. When it changes we can emit - // the grouped aggregates. - let group_itrs_len = &group_itrs.len(); - let mut last_group_row = group_itrs - .iter_mut() - .enumerate() - .map(|(i, itr)| { - if i == group_itrs_len - 1 && window > 0 { - // time column - apply window function - return itr.next().unwrap() / window * window; - } - *itr.next().unwrap() - }) - .collect::>(); - - let mut curr_group_row = last_group_row.clone(); - - // this tracks the last row for each column we are aggregating. - let last_agg_row: Vec = aggregate_itrs - .iter_mut() - .map(|itr| itr.next().unwrap()) - .collect(); - - // this keeps the current cumulative aggregates for the columns we - // are aggregating. - let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates - .iter() - .zip(last_agg_row.iter()) - .map(|((col_name, agg_type), curr_agg)| { - let agg = match agg_type { - AggregateType::Count => column::Aggregate::Count(1), - AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()), - }; - (col_name.clone(), agg) - }) - .collect(); - - let mut results = BTreeMap::new(); - let mut processed_rows = 1; - while processed_rows < *total_rows { - // update next group key. - let mut group_key_changed = false; - for (i, (curr_v, itr)) in curr_group_row - .iter_mut() - .zip(group_itrs.iter_mut()) - .enumerate() - { - let next_v = if i == group_itrs_len - 1 && window > 0 { - // time column - apply window function - itr.next().unwrap() / window * window - } else { - *itr.next().unwrap() - }; - if curr_v != &next_v { - group_key_changed = true; - } - *curr_v = next_v; - } - - // group key changed - emit group row and aggregates. - if group_key_changed { - let key = last_group_row.clone(); - results.insert(key, cum_aggregates.clone()); - - // update group key - last_group_row = curr_group_row.clone(); - - // reset cumulative aggregates - for (_, agg) in cum_aggregates.iter_mut() { - match agg { - column::Aggregate::Count(c) => { - *c = 0; - } - column::Aggregate::Sum(s) => s.reset(), - } - } - } - - // update aggregates - for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) { - let (_, curr_agg) = bind.0; - let next_value = bind.1.next().unwrap(); - curr_agg.update_with(next_value); - } - - processed_rows += 1; + .skip(group_columns.len()) + .zip(aggregates.iter()) + { + aggregate_cols.push((col_name, agg_type, sorted_vector)); } - // Emit final row - results.insert(last_group_row, cum_aggregates); - - log::info!("({:?} rows processed) {:?}", processed_rows, results); - // results - BTreeMap::new() + Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window) } // Executes aggregates grouping by group_columns. If window is positive then @@ -628,14 +537,14 @@ impl Segment { // `aggregate_by_group_using_stream` assumes that all columns being grouped // on are part of the overall segment sort, therefore it does no sorting or // hashing, and just streams aggregates out in order. - pub fn aggregate_by_group_using_stream( + pub fn aggregate_by_group_using_stream<'a>( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { + ) -> Vec> { log::debug!("aggregate_by_group_with_sort_sorted called"); if window > 0 { @@ -650,7 +559,7 @@ impl Segment { if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { filtered_row_ids = row_ids; } else { - return BTreeMap::new(); + return vec![]; } let total_rows = &filtered_row_ids.cardinality(); @@ -677,12 +586,7 @@ impl Segment { } } - let mut new_agg_cols = Vec::with_capacity(aggregates.len()); - for (column_name, agg_type) in aggregates { - new_agg_cols.push((column_name, agg_type, self.column(&column_name))); - } - - let mut group_itrs = group_column_encoded_values + let group_itrs = group_column_encoded_values .iter() .map(|vector| { if let column::Vector::Integer(v) = vector { @@ -693,6 +597,22 @@ impl Segment { }) .collect::>(); + let mut aggregate_cols = Vec::with_capacity(aggregates.len()); + for (column_name, agg_type) in aggregates { + aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap())); + } + + Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window) + } + + // Once the rows necessary for doing a (windowed) grouped aggregate are ready + // this method will build a result set of aggregates in a streaming way. + pub fn stream_grouped_aggregates<'a>( + mut group_itrs: Vec>, + aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>, + total_rows: usize, + window: i64, + ) -> Vec> { // this tracks the last seen group key row. When it changes we can emit // the grouped aggregates. let group_itrs_len = &group_itrs.len(); @@ -710,13 +630,14 @@ impl Segment { let mut curr_group_row = last_group_row.clone(); - let mut results = BTreeMap::new(); + let mut results = vec![]; + let mut processed_rows = 1; let mut group_key_start_row_id = 0; let mut group_size = 0; - while processed_rows < *total_rows { + while processed_rows < total_rows { // update next group key. let mut group_key_changed = false; for (i, (curr_v, itr)) in curr_group_row @@ -738,22 +659,23 @@ impl Segment { // group key changed - emit group row and aggregates. if group_key_changed { - let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); - for (name, agg_type, col) in &new_agg_cols { - if let Some(c) = col { - let agg_result = c.aggregate_by_id_range( - agg_type, - group_key_start_row_id, - group_key_start_row_id + group_size, - ); - group_key_aggregates.push((name, agg_result)); - } else { - panic!("figure this out"); - } + let mut group_key_aggregates = Vec::with_capacity(aggregate_cols.len()); + for (name, agg_type, vector) in &aggregate_cols { + let agg_result = vector.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + + let col_name = name.to_owned().clone(); + group_key_aggregates.push((col_name, agg_result)); } let key = last_group_row.clone(); - results.insert(key, group_key_aggregates); + results.push(GroupedAggregates { + group_key: key, + aggregates: group_key_aggregates, + }); // update group key last_group_row = curr_group_row.clone(); @@ -768,410 +690,27 @@ impl Segment { } // Emit final row - let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); - for (name, agg_type, col) in &new_agg_cols { - if let Some(c) = col { - let agg_result = c.aggregate_by_id_range( - agg_type, - group_key_start_row_id, - group_key_start_row_id + group_size, - ); - group_key_aggregates.push((name, agg_result)); - } else { - panic!("figure this out"); - } + let mut group_key_aggregates = Vec::with_capacity(aggregate_cols.len()); + for (name, agg_type, vector) in &aggregate_cols { + let agg_result = vector.aggregate_by_id_range( + agg_type, + group_key_start_row_id, + group_key_start_row_id + group_size, + ); + + // TODO(edd): fix weirdness + let col_name = name.to_owned().clone(); + group_key_aggregates.push((col_name, agg_result)); } - let key = last_group_row; - results.insert(key, group_key_aggregates); + results.push(GroupedAggregates { + group_key: last_group_row, + aggregates: group_key_aggregates, + }); log::info!("({:?} rows processed) {:?}", processed_rows, results); // results - BTreeMap::new() - } - - pub fn window_aggregate_with_sort( - &self, - time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], - group_columns: &[String], - aggregates: &[(String, AggregateType)], - window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { - if self.group_key_sorted(group_columns) { - log::info!("group key is already sorted {:?}", group_columns); - self.window_aggregate_with_sort_sorted( - time_range, - predicates, - group_columns, - aggregates, - window, - ) - } else { - log::info!("group key needs sorting {:?}", group_columns); - self.window_aggregate_with_sort_unsorted( - time_range, - predicates, - group_columns, - aggregates, - window, - ) - } - } - - // this method assumes that the segment's columns are sorted such that a - // sort of columns is not required. - fn window_aggregate_with_sort_sorted( - &self, - time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], - group_columns: &[String], - aggregates: &[(String, AggregateType)], - window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { - // filter on predicates and time - let filtered_row_ids: croaring::Bitmap; - if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { - filtered_row_ids = row_ids; - } else { - return BTreeMap::new(); - } - let total_rows = &filtered_row_ids.cardinality(); - - let filtered_row_ids_vec = filtered_row_ids - .to_vec() - .iter() - .map(|v| *v as usize) - .collect::>(); - - // materialise all encoded values for the matching rows in the columns - // we are grouping on and store each group as an iterator. - let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); - for group_column in group_columns { - if let Some(column) = self.column(&group_column) { - let encoded_values = column.encoded_values(&filtered_row_ids_vec); - assert_eq!( - filtered_row_ids.cardinality() as usize, - encoded_values.len() - ); - - group_column_encoded_values.push(encoded_values); - } else { - panic!("need to handle no results for filtering/grouping..."); - } - } - - let mut new_agg_cols = Vec::with_capacity(aggregates.len()); - for (column_name, agg_type) in aggregates { - new_agg_cols.push((column_name, agg_type, self.column(&column_name))); - } - - let mut group_itrs = group_column_encoded_values - .iter() - .map(|vector| { - if let column::Vector::Integer(v) = vector { - v.iter() - } else { - panic!("don't support grouping on non-encoded values or time"); - } - }) - .collect::>(); - - // this tracks the last seen group key row. When it changes we can emit - // the grouped aggregates. - let group_itrs_len = &group_itrs.len(); - let mut last_group_row = group_itrs - .iter_mut() - .enumerate() - .map(|(i, itr)| { - if i == group_itrs_len - 1 { - // time column - apply window function - return itr.next().unwrap() / window * window; - } - *itr.next().unwrap() - }) - .collect::>(); - - let mut curr_group_row = last_group_row.clone(); - - let mut results = BTreeMap::new(); - let mut processed_rows = 1; - - let mut group_key_start_row_id = 0; - let mut group_size = 0; - - while processed_rows < *total_rows { - // update next group key. - let mut group_key_changed = false; - for (i, (curr_v, itr)) in curr_group_row - .iter_mut() - .zip(group_itrs.iter_mut()) - .enumerate() - { - let next_v = if i == group_itrs_len - 1 { - // time column - apply window function - itr.next().unwrap() / window * window - } else { - *itr.next().unwrap() - }; - if *curr_v != next_v { - group_key_changed = true; - } - *curr_v = next_v; - } - - // group key changed - emit group row and aggregates. - if group_key_changed { - let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); - for (name, agg_type, col) in &new_agg_cols { - if let Some(c) = col { - let agg_result = c.aggregate_by_id_range( - agg_type, - group_key_start_row_id, - group_key_start_row_id + group_size, - ); - group_key_aggregates.push((name, agg_result)); - } else { - panic!("figure this out"); - } - } - - let key = last_group_row.clone(); - results.insert(key, group_key_aggregates); - - // update group key - last_group_row = curr_group_row.clone(); - - // reset counters tracking group key row range - group_key_start_row_id = processed_rows as usize; // TODO(edd) - could be an off-by-one? - group_size = 0; - } - - group_size += 1; - processed_rows += 1; - } - - // Emit final row - let mut group_key_aggregates = Vec::with_capacity(aggregates.len()); - for (name, agg_type, col) in &new_agg_cols { - if let Some(c) = col { - let agg_result = c.aggregate_by_id_range( - agg_type, - group_key_start_row_id, - group_key_start_row_id + group_size, - ); - group_key_aggregates.push((name, agg_result)); - } else { - panic!("figure this out"); - } - } - - let key = last_group_row; - results.insert(key, group_key_aggregates); - - log::info!("({:?} rows processed) {:?}", processed_rows, results); - // results - BTreeMap::new() - } - - fn window_aggregate_with_sort_unsorted( - &self, - time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], - group_columns: &[String], - aggregates: &[(String, AggregateType)], - window: i64, - ) -> BTreeMap, Vec<(String, column::Aggregate)>> { - // filter on predicates and time - let filtered_row_ids: croaring::Bitmap; - if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { - filtered_row_ids = row_ids; - } else { - return BTreeMap::new(); - } - let total_rows = &filtered_row_ids.cardinality(); - - let filtered_row_ids_vec = filtered_row_ids - .to_vec() - .iter() - .map(|v| *v as usize) - .collect::>(); - - // materialise all encoded values for the matching rows in the columns - // we are grouping on and store each group as an iterator. - let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); - for group_column in group_columns { - if let Some(column) = self.column(&group_column) { - let encoded_values = column.encoded_values(&filtered_row_ids_vec); - assert_eq!( - filtered_row_ids.cardinality() as usize, - encoded_values.len() - ); - group_column_encoded_values.push(Some(encoded_values)); - } else { - group_column_encoded_values.push(None); - } - } - - // TODO(edd): we could do this with an iterator I expect. - // - // materialise all decoded values for the rows in the columns we are - // aggregating on. - let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); - for (column_name, _) in aggregates { - if let Some(column) = self.column(&column_name) { - let decoded_values = column.values(&filtered_row_ids_vec); - assert_eq!( - filtered_row_ids.cardinality() as usize, - decoded_values.len() - ); - aggregate_column_decoded_values.push((column_name, Some(decoded_values))); - } else { - aggregate_column_decoded_values.push((column_name, None)); - } - } - - let mut all_columns = Vec::with_capacity( - group_column_encoded_values.len() + aggregate_column_decoded_values.len(), - ); - - for gc in group_column_encoded_values { - if let Some(p) = gc { - all_columns.push(p); - } else { - panic!("need to handle no results for filtering/grouping..."); - } - } - - for ac in aggregate_column_decoded_values { - if let (_, Some(p)) = ac { - all_columns.push(p); - } else { - panic!("need to handle no results for filtering/grouping..."); - } - } - - let now = std::time::Instant::now(); - if self.group_key_sorted(&group_columns) { - panic!("This shouldn't be called!!!"); - } else { - // now sort on the first grouping columns. Right now the order doesn't matter... - let group_col_sort_order = &(0..group_columns.len()).collect::>(); - super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); - } - log::debug!("time checking sort {:?}", now.elapsed()); - - let mut group_itrs = all_columns - .iter() - .take(group_columns.len()) // only use grouping columns - .map(|vector| { - if let column::Vector::Integer(v) = vector { - v.iter() - } else { - panic!("don't support grouping on non-encoded values"); - } - }) - .collect::>(); - - let mut aggregate_itrs = all_columns - .iter() - .skip(group_columns.len()) // only use grouping columns - .map(|v| column::VectorIterator::new(v)) - .collect::>(); - - // this tracks the last seen group key row. When it changes we can emit - // the grouped aggregates. - let mut last_group_row = group_itrs - .iter_mut() - .enumerate() - .map(|(i, itr)| { - if i == group_columns.len() - 1 { - // time column - apply window function - return itr.next().unwrap() / window * window; - } - *itr.next().unwrap() - }) - .collect::>(); - - let mut curr_group_row = last_group_row.clone(); - - // this tracks the last row for each column we are aggregating. - let last_agg_row: Vec = aggregate_itrs - .iter_mut() - .map(|itr| itr.next().unwrap()) - .collect(); - - // this keeps the current cumulative aggregates for the columns we - // are aggregating. - let mut cum_aggregates: Vec<(String, column::Aggregate)> = aggregates - .iter() - .zip(last_agg_row.iter()) - .map(|((col_name, agg_type), curr_agg)| { - let agg = match agg_type { - AggregateType::Count => column::Aggregate::Count(1), - AggregateType::Sum => column::Aggregate::Sum(curr_agg.clone()), - }; - (col_name.clone(), agg) - }) - .collect(); - - let mut results = BTreeMap::new(); - let mut processed_rows = 1; - while processed_rows < *total_rows { - // update next group key. - let mut group_key_changed = false; - for (i, (curr_v, itr)) in curr_group_row - .iter_mut() - .zip(group_itrs.iter_mut()) - .enumerate() - { - let next_v = if i == group_columns.len() - 1 { - // time column - apply window function - itr.next().unwrap() / window * window - } else { - *itr.next().unwrap() - }; - if curr_v != &next_v { - group_key_changed = true; - } - *curr_v = next_v; - } - - // group key changed - emit group row and aggregates. - if group_key_changed { - let key = last_group_row.clone(); - results.insert(key, cum_aggregates.clone()); - - // update group key - last_group_row = curr_group_row.clone(); - - // reset cumulative aggregates - for (_, agg) in cum_aggregates.iter_mut() { - match agg { - column::Aggregate::Count(c) => { - *c = 0; - } - column::Aggregate::Sum(s) => s.reset(), - } - } - } - - // update aggregates - for bind in cum_aggregates.iter_mut().zip(&mut aggregate_itrs) { - let (_, curr_agg) = bind.0; - let next_value = bind.1.next().unwrap(); - curr_agg.update_with(next_value); - } - - processed_rows += 1; - } - - // Emit final row - results.insert(last_group_row, cum_aggregates); - - log::info!("({:?} rows processed) {:?}", processed_rows, results); - // results - BTreeMap::new() + vec![] } pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { @@ -1399,12 +938,6 @@ impl SegmentMetaData { } } -#[derive(Debug, Clone)] -pub enum Aggregate { - Count, - Sum, -} - pub struct Segments<'a> { segments: Vec<&'a Segment>, } @@ -1482,7 +1015,7 @@ impl<'a> Segments<'a> { aggregates: Vec<(String, AggregateType)>, window: i64, strategy: &GroupingStrategy, - ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { let (min, max) = time_range; if max <= min { panic!("max <= min"); @@ -1521,7 +1054,7 @@ impl<'a> Segments<'a> { group_columns: Vec, aggregates: Vec<(String, AggregateType)>, concurrent: bool, - ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { if concurrent { let group_columns_arc = std::sync::Arc::new(group_columns); let aggregates_arc = std::sync::Arc::new(aggregates); @@ -1599,7 +1132,7 @@ impl<'a> Segments<'a> { aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, Aggregate), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1845,6 +1378,12 @@ pub enum GroupingStrategy { SortGroupConcurrent, } +#[derive(Debug)] +pub struct GroupedAggregates<'a> { + pub group_key: Vec, + pub aggregates: Vec<(String, column::Aggregate<'a>)>, +} + #[cfg(test)] mod test { From bbebee654af07962d94b88898c3043b80a4a510d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 28 Aug 2020 11:47:26 +0100 Subject: [PATCH 41/73] feat: support windowed aggregates with hash sort --- delorean_mem_qe/src/bin/main.rs | 20 +++--- delorean_mem_qe/src/segment.rs | 112 ++++++++++++++++++++------------ 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index be480e8def..5d021fb25b 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -69,8 +69,8 @@ fn main() { // time_sum_range(&store); // time_count_range(&store); // time_group_single_with_pred(&store); - // time_group_by_multi_agg_count(&store); - // time_group_by_multi_agg_sorted_count(&store); + time_group_by_multi_agg_count(&store); + time_group_by_multi_agg_sorted_count(&store); time_window_agg_count(&store); } @@ -120,10 +120,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - if i < 363 { - i += 1; - continue; - } + // if i < 363 { + // i += 1; + // continue; + // } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -550,14 +550,14 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { fn time_window_agg_count(store: &Store) { let strats = vec![ - // GroupingStrategy::HashGroup, - // GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::HashGroup, + GroupingStrategy::HashGroupConcurrent, GroupingStrategy::SortGroup, - // GroupingStrategy::SortGroupConcurrent, + GroupingStrategy::SortGroupConcurrent, ]; for strat in &strats { - let repeat = 10000; + let repeat = 1; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index a9da3a6543..dddf06a0fe 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -227,12 +227,21 @@ impl Segment { predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], aggregates: &[(String, AggregateType)], + window: i64, ) -> BTreeMap, Vec<(String, Option)>> { - // println!("working segment {:?}", time_range); // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. + log::debug!("aggregate_by_group_with_hash called"); + + if window > 0 { + // last column on group key should be time. + assert_eq!(group_columns[group_columns.len() - 1], "time"); + } else { + assert_ne!(group_columns[group_columns.len() - 1], "time"); + } + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -248,29 +257,20 @@ impl Segment { .iter() .map(|v| *v as usize) .collect::>(); - // println!("TOTAL FILTERED ROWS {:?}", total_rows); // materialise all encoded values for the matching rows in the columns // we are grouping on and store each group as an iterator. let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { if let Some(column) = self.column(&group_column) { - let encoded_values: Vec; - if let column::Vector::Integer(vector) = - column.encoded_values(&filtered_row_ids_vec) - { - encoded_values = vector; - } else { - unimplemented!("currently you can only group on encoded string columns"); - } - + let encoded_values = column.encoded_values(&filtered_row_ids_vec); assert_eq!( filtered_row_ids.cardinality() as usize, encoded_values.len() ); - group_column_encoded_values.push(Some(encoded_values)); + group_column_encoded_values.push(encoded_values); } else { - group_column_encoded_values.push(None); + panic!("need to handle no results for filtering/grouping..."); } } // println!("grouped columns {:?}", group_column_encoded_values); @@ -304,9 +304,12 @@ impl Segment { // filtering stage we will just emit None. let mut group_itrs = group_column_encoded_values .iter() - .map(|x| match x { - Some(values) => Some(values.iter()), - None => None, + .map(|vector| { + if let column::Vector::Integer(v) = vector { + v.iter() + } else { + panic!("don't support grouping on non-encoded values"); + } }) .collect::>(); @@ -321,10 +324,11 @@ impl Segment { }) .collect::>(); + // hashMap is about 20% faster than BTreeMap in this case let mut hash_table: HashMap< - Vec>, + Vec, Vec<(&String, &AggregateType, Option)>, - > = HashMap::with_capacity(30000); + > = HashMap::new(); let mut aggregate_row: Vec<(&str, Option)> = std::iter::repeat_with(|| ("", None)) @@ -332,22 +336,20 @@ impl Segment { .collect(); let mut processed_rows = 0; - while processed_rows < *total_rows { - let group_row: Vec> = group_itrs - .iter_mut() - .map(|x| match x { - Some(itr) => itr.next(), - None => None, - }) - .collect(); + let group_itrs_len = &group_itrs.len(); - // let aggregate_row: Vec<(&str, Option)> = aggregate_itrs - // .iter_mut() - // .map(|&mut (col_name, ref mut itr)| match itr { - // Some(itr) => (col_name, itr.next()), - // None => (col_name, None), - // }) - // .collect(); + while processed_rows < *total_rows { + let group_row = group_itrs + .iter_mut() + .enumerate() + .map(|(i, itr)| { + if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + return itr.next().unwrap() / window * window; + } + *itr.next().unwrap() + }) + .collect::>(); // re-use aggregate_row vector. for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { @@ -407,7 +409,7 @@ impl Segment { } processed_rows += 1; } - log::debug!("{:?}", hash_table); + log::info!("({:?} rows processed) {:?}", processed_rows, hash_table); BTreeMap::new() } @@ -428,6 +430,10 @@ impl Segment { assert_ne!(group_columns[group_columns.len() - 1], "time"); } + // TODO(edd): Perf - if there is no predicate and we want entire segment + // then it will be a lot faster to not build filtered_row_ids and just + // get all encoded values for each grouping column... + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -605,8 +611,9 @@ impl Segment { Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window) } - // Once the rows necessary for doing a (windowed) grouped aggregate are ready - // this method will build a result set of aggregates in a streaming way. + // Once the rows necessary for doing a (windowed) grouped aggregate are + // available and appropriately sorted this method will build a result set of + // aggregates in a streaming way. pub fn stream_grouped_aggregates<'a>( mut group_itrs: Vec>, aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>, @@ -1022,12 +1029,22 @@ impl<'a> Segments<'a> { } match strategy { - GroupingStrategy::HashGroup => { - self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, false) - } - GroupingStrategy::HashGroupConcurrent => { - self.read_group_eq_hash(time_range, predicates, group_columns, aggregates, true) - } + GroupingStrategy::HashGroup => self.read_group_eq_hash( + time_range, + predicates, + group_columns, + aggregates, + window, + false, + ), + GroupingStrategy::HashGroupConcurrent => self.read_group_eq_hash( + time_range, + predicates, + group_columns, + aggregates, + window, + true, + ), GroupingStrategy::SortGroup => self.read_group_eq_sort( time_range, predicates, @@ -1051,10 +1068,16 @@ impl<'a> Segments<'a> { &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], - group_columns: Vec, + mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, + window: i64, concurrent: bool, ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { + if window > 0 { + // add time column to the group key + group_columns.push("time".to_string()); + } + if concurrent { let group_columns_arc = std::sync::Arc::new(group_columns); let aggregates_arc = std::sync::Arc::new(aggregates); @@ -1072,6 +1095,7 @@ impl<'a> Segments<'a> { predicates, &group_columns, &aggregates, + window, ); log::info!( "processed segment {:?} using multi-threaded hash-grouping in {:?}", @@ -1092,6 +1116,7 @@ impl<'a> Segments<'a> { predicates, &group_columns_arc.clone(), &aggregates_arc.clone(), + window, ); log::info!( "processed segment {:?} using multi-threaded hash-grouping in {:?}", @@ -1113,6 +1138,7 @@ impl<'a> Segments<'a> { predicates, &group_columns, &aggregates, + window, ); log::info!( "processed segment {:?} using single-threaded hash-grouping in {:?}", From cfa0ef9c2302a4a343951854a3e57c5e277aada9 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 3 Sep 2020 15:53:30 +0100 Subject: [PATCH 42/73] perf: improve group sort --- delorean_mem_qe/src/segment.rs | 50 +++++++++++++++------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index dddf06a0fe..c058df01f1 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -335,21 +335,19 @@ impl Segment { .take(aggregate_itrs.len()) .collect(); - let mut processed_rows = 0; let group_itrs_len = &group_itrs.len(); + let mut group_key: Vec = vec![0; *group_itrs_len]; + let mut processed_rows = 0; while processed_rows < *total_rows { - let group_row = group_itrs - .iter_mut() - .enumerate() - .map(|(i, itr)| { - if i == group_itrs_len - 1 && window > 0 { - // time column - apply window function - return itr.next().unwrap() / window * window; - } - *itr.next().unwrap() - }) - .collect::>(); + group_itrs.iter_mut().enumerate().for_each(|(i, itr)| { + if i == group_itrs_len - 1 && window > 0 { + // time column - apply window function + group_key[i] = itr.next().unwrap() / window * window; + } else { + group_key[i] = *itr.next().unwrap(); + } + }); // re-use aggregate_row vector. for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { @@ -359,17 +357,16 @@ impl Segment { } } - // Lookup the group key in the hash map - if it's empty then insert - // a place-holder for each aggregate being executed. - let group_key_entry = hash_table.entry(group_row).or_insert_with(|| { - // TODO COULD BE MAP/COLLECT + // This is cheaper than allocating a key and using the entry API + if !hash_table.contains_key(&group_key) { let mut agg_results: Vec<(&String, &AggregateType, Option)> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option } - agg_results - }); + hash_table.insert(group_key.clone(), agg_results); + } + let group_key_entry = hash_table.get_mut(&group_key).unwrap(); // Update aggregates - we process each row value and for each one // check which aggregates apply to it. @@ -409,7 +406,7 @@ impl Segment { } processed_rows += 1; } - log::info!("({:?} rows processed) {:?}", processed_rows, hash_table); + log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); BTreeMap::new() } @@ -551,7 +548,7 @@ impl Segment { aggregates: &[(String, AggregateType)], window: i64, ) -> Vec> { - log::debug!("aggregate_by_group_with_sort_sorted called"); + log::debug!("aggregate_by_group_using_stream called"); if window > 0 { // last column on group key should be time. @@ -674,13 +671,11 @@ impl Segment { group_key_start_row_id + group_size, ); - let col_name = name.to_owned().clone(); - group_key_aggregates.push((col_name, agg_result)); + group_key_aggregates.push((*name, agg_result)); } - let key = last_group_row.clone(); results.push(GroupedAggregates { - group_key: key, + group_key: last_group_row, aggregates: group_key_aggregates, }); @@ -706,8 +701,7 @@ impl Segment { ); // TODO(edd): fix weirdness - let col_name = name.to_owned().clone(); - group_key_aggregates.push((col_name, agg_result)); + group_key_aggregates.push((*name, agg_result)); } results.push(GroupedAggregates { @@ -715,7 +709,7 @@ impl Segment { aggregates: group_key_aggregates, }); - log::info!("({:?} rows processed) {:?}", processed_rows, results); + log::debug!("({:?} rows processed) {:?}", processed_rows, results); // results vec![] } @@ -1407,7 +1401,7 @@ pub enum GroupingStrategy { #[derive(Debug)] pub struct GroupedAggregates<'a> { pub group_key: Vec, - pub aggregates: Vec<(String, column::Aggregate<'a>)>, + pub aggregates: Vec<(&'a String, column::Aggregate<'a>)>, } #[cfg(test)] From cad5e45208346528ad02cd04dcac863f90faa037 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 4 Sep 2020 12:08:22 +0100 Subject: [PATCH 43/73] perf: add ability to get all encoded values --- delorean_mem_qe/src/column.rs | 28 ++++++++++++++++++++ delorean_mem_qe/src/encoding.rs | 45 +++++++++++++++++++++++---------- delorean_mem_qe/src/segment.rs | 36 +++++++++++++++++++++----- 3 files changed, 89 insertions(+), 20 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index bc89cb23bd..b3df18edca 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -537,6 +537,22 @@ impl Column { } } + /// Materialise all of the encoded values. + pub fn all_encoded_values(&self) -> Vector { + match self { + Column::String(c) => { + let now = std::time::Instant::now(); + let v = c.all_encoded_values(); + log::debug!("time getting all encoded values {:?}", now.elapsed()); + + log::debug!("dictionary {:?}", c.data.dictionary()); + Vector::Integer(v) + } + Column::Float(c) => Vector::Float(c.all_encoded_values()), + Column::Integer(c) => Vector::Integer(c.all_encoded_values()), + } + } + /// Given an encoded value for a row, materialise and return the decoded /// version. /// @@ -986,6 +1002,10 @@ impl String { self.data.encoded_values(row_ids) } + pub fn all_encoded_values(&self) -> Vec { + self.data.all_encoded_values() + } + /// Return the decoded value for an encoded ID. /// /// Panics if there is no decoded value for the provided id @@ -1037,6 +1057,10 @@ impl Float { self.data.encoded_values(row_ids) } + pub fn all_encoded_values(&self) -> Vec { + self.data.all_encoded_values() + } + pub fn scan_from(&self, row_id: usize) -> &[f64] { self.data.scan_from(row_id) } @@ -1106,6 +1130,10 @@ impl Integer { self.data.encoded_values(row_ids) } + pub fn all_encoded_values(&self) -> Vec { + self.data.all_encoded_values() + } + pub fn scan_from(&self, row_id: usize) -> &[i64] { self.data.scan_from(row_id) } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index d6a865a5f1..4b057cfc96 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -68,6 +68,12 @@ where self.values(row_ids) } + /// Return all encoded values. For this encoding this is just the decoded + /// values + pub fn all_encoded_values(&self) -> Vec { + self.values.clone() + } + // TODO(edd): fix this when added NULL support pub fn scan_from_until_some(&self, _row_id: usize) -> Option { unreachable!("to remove"); @@ -485,6 +491,26 @@ impl DictionaryRLE { out } + // values materialises a vector of references to all logical values in the + // encoding. + pub fn all_values(&mut self) -> Vec> { + let mut out: Vec> = Vec::with_capacity(self.total as usize); + + // build reverse mapping. + let mut idx_value = BTreeMap::new(); + for (k, v) in &self.entry_index { + idx_value.insert(v, k); + } + assert_eq!(idx_value.len(), self.entry_index.len()); + + for (idx, rl) in &self.run_lengths { + // TODO(edd): fix unwrap - we know that the value exists in map... + let v = idx_value.get(&idx).unwrap().as_ref(); + out.extend(iter::repeat(v).take(*rl as usize)); + } + out + } + /// Return the decoded value for an encoded ID. /// /// Panics if there is no decoded value for the provided id @@ -528,22 +554,13 @@ impl DictionaryRLE { out } - // values materialises a vector of references to all logical values in the - // encoding. - pub fn all_values(&mut self) -> Vec> { - let mut out: Vec> = Vec::with_capacity(self.total as usize); - - // build reverse mapping. - let mut idx_value = BTreeMap::new(); - for (k, v) in &self.entry_index { - idx_value.insert(v, k); - } - assert_eq!(idx_value.len(), self.entry_index.len()); + // all_encoded_values materialises a vector of all encoded values for the + // column. + pub fn all_encoded_values(&self) -> Vec { + let mut out: Vec = Vec::with_capacity(self.total as usize); for (idx, rl) in &self.run_lengths { - // TODO(edd): fix unwrap - we know that the value exists in map... - let v = idx_value.get(&idx).unwrap().as_ref(); - out.extend(iter::repeat(v).take(*rl as usize)); + out.extend(iter::repeat(*idx as i64).take(*rl as usize)); } out } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index c058df01f1..f8c500593e 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -228,7 +228,7 @@ impl Segment { group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(String, Option)>> { + ) -> BTreeMap, Vec<(&String, &AggregateType, Option)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -242,6 +242,10 @@ impl Segment { assert_ne!(group_columns[group_columns.len() - 1], "time"); } + // TODO(edd): Perf - if there is no predicate and we want entire segment + // then it will be a lot faster to not build filtered_row_ids and just + // get all encoded values for each grouping column... + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -263,7 +267,12 @@ impl Segment { let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { if let Some(column) = self.column(&group_column) { - let encoded_values = column.encoded_values(&filtered_row_ids_vec); + let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows { + column.all_encoded_values() + } else { + column.encoded_values(&filtered_row_ids_vec) + }; + assert_eq!( filtered_row_ids.cardinality() as usize, encoded_values.len() @@ -325,10 +334,10 @@ impl Segment { .collect::>(); // hashMap is about 20% faster than BTreeMap in this case - let mut hash_table: HashMap< + let mut hash_table: BTreeMap< Vec, Vec<(&String, &AggregateType, Option)>, - > = HashMap::new(); + > = BTreeMap::new(); let mut aggregate_row: Vec<(&str, Option)> = std::iter::repeat_with(|| ("", None)) @@ -406,8 +415,10 @@ impl Segment { } processed_rows += 1; } + // println!("groups: {:?}", hash_table.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); BTreeMap::new() + // hash_table } pub fn aggregate_by_group_using_sort( @@ -451,7 +462,11 @@ impl Segment { let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { if let Some(column) = self.column(&group_column) { - let encoded_values = column.encoded_values(&filtered_row_ids_vec); + let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows { + column.all_encoded_values() + } else { + column.encoded_values(&filtered_row_ids_vec) + }; assert_eq!( filtered_row_ids.cardinality() as usize, encoded_values.len() @@ -557,6 +572,10 @@ impl Segment { assert_ne!(group_columns[group_columns.len() - 1], "time"); } + // TODO(edd): Perf - if there is no predicate and we want entire segment + // then it will be a lot faster to not build filtered_row_ids and just + // get all encoded values for each grouping column... + // filter on predicates and time let filtered_row_ids: croaring::Bitmap; if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { @@ -577,7 +596,11 @@ impl Segment { let mut group_column_encoded_values = Vec::with_capacity(group_columns.len()); for group_column in group_columns { if let Some(column) = self.column(&group_column) { - let encoded_values = column.encoded_values(&filtered_row_ids_vec); + let encoded_values = if filtered_row_ids_vec.len() == self.meta.rows { + column.all_encoded_values() + } else { + column.encoded_values(&filtered_row_ids_vec) + }; assert_eq!( filtered_row_ids.cardinality() as usize, encoded_values.len() @@ -709,6 +732,7 @@ impl Segment { aggregates: group_key_aggregates, }); + // println!("groups: {:?}", results.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, results); // results vec![] From d3fd2c36290cc9257ace84c1c633d6d390743b59 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 4 Sep 2020 15:58:04 +0100 Subject: [PATCH 44/73] feat: wip group by * --- delorean_mem_qe/src/bin/main.rs | 84 ++++++++++++++++++++++++++++----- delorean_mem_qe/src/segment.rs | 33 +++++++------ 2 files changed, 92 insertions(+), 25 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 5d021fb25b..d4b810467d 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -63,15 +63,16 @@ fn main() { ); let store = Arc::new(store); - // time_select_with_pred(&store); - // time_datafusion_select_with_pred(store.clone()); - // time_first_host(&store); - // time_sum_range(&store); - // time_count_range(&store); - // time_group_single_with_pred(&store); + time_select_with_pred(&store); + time_datafusion_select_with_pred(store.clone()); + time_first_host(&store); + time_sum_range(&store); + time_count_range(&store); + time_group_single_with_pred(&store); time_group_by_multi_agg_count(&store); time_group_by_multi_agg_sorted_count(&store); time_window_agg_count(&store); + // time_group_by_different_columns(&store); } fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { @@ -94,7 +95,7 @@ fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> } fn build_arrow_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { - let r = File::open(Path::new("/Users/edd/work/InfluxData/delorean_misc/in-memory-sort/env_role_path_time/http_api_requests_total.arrow")).unwrap(); + let r = File::open(Path::new(path)).unwrap(); let file_size = fs::metadata(&path).expect("read metadata").len(); println!( "Reading {} ({}) bytes of Arrow from {:?}....", @@ -120,7 +121,7 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - // if i < 363 { + // if i < 364 { // i += 1; // continue; // } @@ -467,9 +468,9 @@ fn time_group_single_with_pred(store: &Store) { fn time_group_by_multi_agg_count(store: &Store) { let strats = vec![ GroupingStrategy::HashGroup, - GroupingStrategy::HashGroupConcurrent, + // GroupingStrategy::HashGroupConcurrent, GroupingStrategy::SortGroup, - GroupingStrategy::SortGroupConcurrent, + // GroupingStrategy::SortGroupConcurrent, ]; for strat in &strats { @@ -518,7 +519,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { ]; for strat in &strats { - let repeat = 10; + let repeat = 1; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); @@ -586,3 +587,64 @@ fn time_window_agg_count(store: &Store) { ); } } + +// This is for a performance experiment where I wanted to show the performance +// change as more columns are grouped on. +// +// This only shows good peformance when the input file is ordered on all of the +// columns below. +fn time_group_by_different_columns(store: &Store) { + let strats = vec![ + GroupingStrategy::HashGroup, + GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::SortGroup, + GroupingStrategy::SortGroupConcurrent, + ]; + + let cols = vec![ + "status".to_string(), + "method".to_string(), + "url".to_string(), + "env".to_string(), + "handler".to_string(), + "role".to_string(), + "user_agent".to_string(), + "path".to_string(), + "nodename".to_string(), + "host".to_string(), + "hostname".to_string(), + ]; + + for strat in &strats { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut total_max = 0; + let segments = store.segments(); + + for i in 1..=cols.len() { + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let groups = segments.read_group_eq( + (1589000000000001, 1590044410000000), + &[], + cols[0..i].to_vec(), + vec![("counter".to_string(), AggregateType::Count)], + 0, + strat, + ); + + total_time += now.elapsed(); + total_max += groups.len(); + } + println!( + "time_group_by_different_columns{:?} cols: {:?} ran {:?} in {:?} {:?}", + strat, + i, + repeat, + total_time, + total_time / repeat, + ); + } + } +} diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index f8c500593e..c06e9d6bca 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -221,14 +221,14 @@ impl Segment { true } - pub fn aggregate_by_group_with_hash( + pub fn aggregate_by_group_with_hash<'a>( &self, time_range: (i64, i64), predicates: &[(&str, Option<&column::Scalar>)], group_columns: &[String], - aggregates: &[(String, AggregateType)], + aggregates: &'a [(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&String, &AggregateType, Option)>> { + ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, Option)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -290,6 +290,8 @@ impl Segment { // aggregating on. let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); for (column_name, _) in aggregates { + let column_name: &'a String = column_name; + if let Some(column) = self.column(&column_name) { let decoded_values = column.values(&filtered_row_ids_vec); assert_eq!( @@ -336,7 +338,7 @@ impl Segment { // hashMap is about 20% faster than BTreeMap in this case let mut hash_table: BTreeMap< Vec, - Vec<(&String, &AggregateType, Option)>, + Vec<(&'a String, &'a AggregateType, Option)>, > = BTreeMap::new(); let mut aggregate_row: Vec<(&str, Option)> = @@ -368,8 +370,11 @@ impl Segment { // This is cheaper than allocating a key and using the entry API if !hash_table.contains_key(&group_key) { - let mut agg_results: Vec<(&String, &AggregateType, Option)> = - Vec::with_capacity(aggregates.len()); + let mut agg_results: Vec<( + &'a String, + &'a AggregateType, + Option, + )> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option } @@ -417,8 +422,8 @@ impl Segment { } // println!("groups: {:?}", hash_table.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); - BTreeMap::new() - // hash_table + // BTreeMap::new() + hash_table } pub fn aggregate_by_group_using_sort( @@ -1097,14 +1102,14 @@ impl<'a> Segments<'a> { } if concurrent { - let group_columns_arc = std::sync::Arc::new(group_columns); - let aggregates_arc = std::sync::Arc::new(aggregates); + // let group_columns_arc = std::sync::Arc::new(group_columns); + // let aggregates_arc = std::sync::Arc::new(aggregates); for chunked_segments in self.segments.chunks(THREADS) { crossbeam::scope(|scope| { for segment in chunked_segments { - let group_columns = group_columns_arc.clone(); - let aggregates = aggregates_arc.clone(); + // let group_columns = group_columns_arc.clone(); + // let aggregates = aggregates_arc.clone(); scope.spawn(move |_| { let now = std::time::Instant::now(); @@ -1132,8 +1137,8 @@ impl<'a> Segments<'a> { segment.aggregate_by_group_with_hash( time_range, predicates, - &group_columns_arc.clone(), - &aggregates_arc.clone(), + &group_columns, + &aggregates, window, ); log::info!( From 551f6c3c78049aa147c33e92dfed60c7c5e6005f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 7 Sep 2020 11:20:08 +0100 Subject: [PATCH 45/73] refactor: cleanup --- delorean_mem_qe/src/column.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index b3df18edca..89345800a5 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -54,17 +54,17 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { match self { Self::Float(v) => { if let Self::Float(other) = _rhs { - return Self::Float(v + other); + Self::Float(v + other) } else { panic!("invalid"); - }; + } } Self::Integer(v) => { if let Self::Integer(other) = _rhs { - return Self::Integer(v + other); + Self::Integer(v + other) } else { panic!("invalid"); - }; + } } Self::String(_) => { unreachable!("not possible to add strings"); @@ -140,17 +140,17 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { match self { Self::Count(c) => { if let Self::Count(other) = _rhs { - return Self::Count(c + other); + Self::Count(c + other) } else { panic!("invalid"); - }; + } } Self::Sum(s) => { if let Self::Sum(other) = _rhs { - return Self::Sum(s + other); + Self::Sum(s + other) } else { panic!("invalid"); - }; + } } } } @@ -360,7 +360,7 @@ impl Column { /// Materialise the decoded value matching the provided logical /// row id. - pub fn value(&self, row_id: usize) -> Option { + pub fn value(&self, row_id: usize) -> Option> { match self { Column::String(c) => { if row_id >= self.num_rows() { From 9cb18fd94216c97d930679c4582578d03571f0da Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 7 Sep 2020 17:42:50 +0100 Subject: [PATCH 46/73] refactor: address lifetimes --- delorean_mem_qe/src/adapter.rs | 2 +- delorean_mem_qe/src/column.rs | 92 +++++++++++++++++----------------- delorean_mem_qe/src/lib.rs | 3 +- delorean_mem_qe/src/segment.rs | 16 +++--- delorean_mem_qe/src/sorter.rs | 12 ++--- 5 files changed, 63 insertions(+), 62 deletions(-) diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs index 0fcb5cb34a..e7fed4a721 100644 --- a/delorean_mem_qe/src/adapter.rs +++ b/delorean_mem_qe/src/adapter.rs @@ -181,7 +181,7 @@ impl LogicalPlanNode for SegmentScan { } /// Write a single line human readable string to `f` for use in explain plan - fn format_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "SegmentScan: {:?} predicate {:?}", diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 89345800a5..7d7618d92b 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -162,7 +162,7 @@ pub trait AggregatableByRange { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate; + ) -> Aggregate<'_>; } /// A Vector is a materialised vector of values from a column. pub enum Vector<'a> { @@ -177,7 +177,7 @@ impl<'a> Vector<'a> { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate { + ) -> Aggregate<'a> { match agg_type { AggregateType::Count => { Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) @@ -186,7 +186,7 @@ impl<'a> Vector<'a> { } } - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> { match self { Vector::String(_) => { panic!("can't sum strings...."); @@ -282,7 +282,7 @@ impl AggregatableByRange for &Vector<'_> { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate { + ) -> Aggregate<'_> { Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) } } @@ -389,7 +389,7 @@ impl Column { /// Materialise all of the decoded values matching the provided logical /// row ids. - pub fn values(&self, row_ids: &[usize]) -> Vector { + pub fn values(&self, row_ids: &[usize]) -> Vector<'_> { match self { Column::String(c) => { if row_ids.is_empty() { @@ -424,7 +424,7 @@ impl Column { /// Materialise all of the decoded values matching the provided logical /// row ids within the bitmap - pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { match self { Column::String(c) => { if row_ids.is_empty() { @@ -467,7 +467,7 @@ impl Column { /// Materialise all of the encoded values matching the provided logical /// row ids. - pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { let now = std::time::Instant::now(); let row_ids_vec = row_ids .to_vec() @@ -506,7 +506,7 @@ impl Column { /// Materialise all of the encoded values matching the provided logical /// row ids. - pub fn encoded_values(&self, row_ids: &[usize]) -> Vector { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vector<'_> { match self { Column::String(c) => { if row_ids.is_empty() { @@ -538,7 +538,7 @@ impl Column { } /// Materialise all of the encoded values. - pub fn all_encoded_values(&self) -> Vector { + pub fn all_encoded_values(&self) -> Vector<'_> { match self { Column::String(c) => { let now = std::time::Instant::now(); @@ -574,7 +574,7 @@ impl Column { } /// materialise rows for each row_id - pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { let now = std::time::Instant::now(); let row_ids_vec = row_ids .to_vec() @@ -596,33 +596,33 @@ impl Column { } } - /// materialise all rows including and after row_id - pub fn scan_from(&self, _row_id: usize) -> Option { - unimplemented!("todo"); - // if row_id >= self.num_rows() { - // println!( - // "asking for {:?} but only got {:?} rows", - // row_id, - // self.num_rows() - // ); - // return None; - // } + // /// materialise all rows including and after row_id + // pub fn scan_from(&self, _row_id: usize) -> Option { + // unimplemented!("todo"); + // // if row_id >= self.num_rows() { + // // println!( + // // "asking for {:?} but only got {:?} rows", + // // row_id, + // // self.num_rows() + // // ); + // // return None; + // // } - // println!( - // "asking for {:?} with a column having {:?} rows", - // row_id, - // self.num_rows() - // ); - // match self { - // Column::String(c) => Some(Vector::String(c.scan_from(row_id))), - // Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), - // Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), - // } - } + // // println!( + // // "asking for {:?} with a column having {:?} rows", + // // row_id, + // // self.num_rows() + // // ); + // // match self { + // // Column::String(c) => Some(Vector::String(c.scan_from(row_id))), + // // Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), + // // Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), + // // } + // } /// Given the provided row_id scans the column until a non-null value found /// or the column is exhausted. - pub fn scan_from_until_some(&self, row_id: usize) -> Option { + pub fn scan_from_until_some(&self, row_id: usize) -> Option> { match self { Column::String(c) => { if row_id >= self.num_rows() { @@ -655,7 +655,7 @@ impl Column { } } - pub fn maybe_contains(&self, value: Option<&Scalar>) -> bool { + pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool { match self { Column::String(c) => match value { Some(scalar) => { @@ -685,7 +685,7 @@ impl Column { } /// returns true if the column cannot contain - pub fn max_less_than(&self, value: Option<&Scalar>) -> bool { + pub fn max_less_than(&self, value: Option<&Scalar<'_>>) -> bool { match self { Column::String(c) => match value { Some(scalar) => { @@ -714,7 +714,7 @@ impl Column { } } - pub fn min_greater_than(&self, value: Option<&Scalar>) -> bool { + pub fn min_greater_than(&self, value: Option<&Scalar<'_>>) -> bool { match self { Column::String(c) => match value { Some(scalar) => { @@ -745,7 +745,7 @@ impl Column { /// Returns the minimum value contained within this column. // FIXME(edd): Support NULL integers and floats - pub fn min(&self) -> Option { + pub fn min(&self) -> Option> { match self { Column::String(c) => { if let Some(min) = c.meta.range().0 { @@ -760,7 +760,7 @@ impl Column { /// Returns the maximum value contained within this column. // FIXME(edd): Support NULL integers and floats - pub fn max(&self) -> Option { + pub fn max(&self) -> Option> { match self { Column::String(c) => { if let Some(max) = c.meta.range().1 { @@ -773,7 +773,7 @@ impl Column { } } - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option> { match self { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))), @@ -786,7 +786,7 @@ impl Column { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate { + ) -> Aggregate<'_> { match self { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => match agg_type { @@ -811,21 +811,21 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - pub fn row_ids_eq(&self, value: Option<&Scalar>) -> Option { + pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option { if !self.maybe_contains(value) { return None; } self.row_ids(value, std::cmp::Ordering::Equal) } - pub fn row_ids_gt(&self, value: Option<&Scalar>) -> Option { + pub fn row_ids_gt(&self, value: Option<&Scalar<'_>>) -> Option { if self.max_less_than(value) { return None; } self.row_ids(value, std::cmp::Ordering::Greater) } - pub fn row_ids_lt(&self, value: Option<&Scalar>) -> Option { + pub fn row_ids_lt(&self, value: Option<&Scalar<'_>>) -> Option { if self.min_greater_than(value) { return None; } @@ -838,7 +838,7 @@ impl Column { // or // // WHERE counter >= 102.2 AND counter < 2929.32 - pub fn row_ids_gte_lt(&self, low: &Scalar, high: &Scalar) -> Option { + pub fn row_ids_gte_lt(&self, low: &Scalar<'_>, high: &Scalar<'_>) -> Option { match self { Column::String(_c) => { unimplemented!("not implemented yet"); @@ -895,7 +895,7 @@ impl Column { // TODO(edd) shouldn't let roaring stuff leak out... fn row_ids( &self, - value: Option<&Scalar>, + value: Option<&Scalar<'_>>, order: std::cmp::Ordering, ) -> Option { match self { @@ -938,7 +938,7 @@ impl AggregatableByRange for &Column { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate { + ) -> Aggregate<'_> { Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) } } diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index c8617e1cf0..4176d3cdd8 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -1,3 +1,4 @@ +#![deny(rust_2018_idioms)] pub mod adapter; pub mod column; pub mod encoding; @@ -30,7 +31,7 @@ impl Store { self.segments.len() } - pub fn segments(&self) -> Segments { + pub fn segments(&self) -> Segments<'_> { Segments::new(self.segments.iter().collect::>()) } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index c06e9d6bca..207951823e 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -145,12 +145,12 @@ impl Segment { column_sizes } - pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option { - if let Some(i) = self.column_names().iter().position(|c| c == column_name) { - return self.columns[i].scan_from(row_id); - } - None - } + // pub fn scan_column_from(&self, column_name: &str, row_id: usize) -> Option { + // if let Some(i) = self.column_names().iter().position(|c| c == column_name) { + // return self.columns[i].scan_from(row_id); + // } + // None + // } // Materialise all rows for each desired column. // @@ -1108,8 +1108,8 @@ impl<'a> Segments<'a> { for chunked_segments in self.segments.chunks(THREADS) { crossbeam::scope(|scope| { for segment in chunked_segments { - // let group_columns = group_columns_arc.clone(); - // let aggregates = aggregates_arc.clone(); + let group_columns = &group_columns; + let aggregates = &aggregates; scope.spawn(move |_| { let now = std::time::Instant::now(); diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs index c8b01a3432..fb592b87ad 100644 --- a/delorean_mem_qe/src/sorter.rs +++ b/delorean_mem_qe/src/sorter.rs @@ -43,7 +43,7 @@ const SORTED_CHECK_SIZE: usize = 1000; /// /// All chosen columns will be sorted in ascending order; the sort is *not* /// stable. -pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> { +pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), Error> { if vectors.is_empty() || sort_by.is_empty() { return Ok(()); } @@ -87,7 +87,7 @@ pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Err Ok(()) } -fn quicksort_by(vectors: &mut [column::Vector], range: Range, sort_by: &[usize]) { +fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range, sort_by: &[usize]) { if range.start >= range.end { return; } @@ -97,7 +97,7 @@ fn quicksort_by(vectors: &mut [column::Vector], range: Range, sort_by: &[ quicksort_by(vectors, pivot + 1..range.end, sort_by); } -fn partition(vectors: &mut [column::Vector], range: &Range, sort_by: &[usize]) -> usize { +fn partition(vectors: &mut [column::Vector<'_>], range: &Range, sort_by: &[usize]) -> usize { let pivot = (range.start + range.end) / 2; let (lo, hi) = (range.start, range.end); if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less { @@ -133,7 +133,7 @@ fn partition(vectors: &mut [column::Vector], range: &Range, sort_by: &[us } } -fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering { +fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering { for &idx in sort_by { match &vectors[idx] { column::Vector::String(p) => { @@ -157,7 +157,7 @@ fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ord } #[allow(dead_code)] -fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool { +fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usize]) -> bool { 'row_wise: for i in 1..len { for &idx in sort_by { match &vectors[idx] { @@ -191,7 +191,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) } // Swap the same pair of elements in each packer column -fn swap(vectors: &mut [column::Vector], a: usize, b: usize) { +fn swap(vectors: &mut [column::Vector<'_>], a: usize, b: usize) { for p in vectors { p.swap(a, b); } From b0e0676f61e5f4ebe8a554bf4d9f52ee2860b4ad Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 7 Sep 2020 17:47:32 +0100 Subject: [PATCH 47/73] refactor: address lifetimes --- delorean_mem_qe/src/segment.rs | 71 ++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 207951823e..77936fab85 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -160,8 +160,8 @@ impl Segment { &self, row_ids: &croaring::Bitmap, columns: &[String], - ) -> BTreeMap { - let mut rows: BTreeMap = BTreeMap::new(); + ) -> BTreeMap> { + let mut rows: BTreeMap> = BTreeMap::new(); if row_ids.is_empty() { // nothing to return return rows; @@ -224,11 +224,12 @@ impl Segment { pub fn aggregate_by_group_with_hash<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'a>>)], group_columns: &[String], aggregates: &'a [(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, Option)>> { + ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, Option>)>> + { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -338,10 +339,10 @@ impl Segment { // hashMap is about 20% faster than BTreeMap in this case let mut hash_table: BTreeMap< Vec, - Vec<(&'a String, &'a AggregateType, Option)>, + Vec<(&'a String, &'a AggregateType, Option>)>, > = BTreeMap::new(); - let mut aggregate_row: Vec<(&str, Option)> = + let mut aggregate_row: Vec<(&str, Option>)> = std::iter::repeat_with(|| ("", None)) .take(aggregate_itrs.len()) .collect(); @@ -373,7 +374,7 @@ impl Segment { let mut agg_results: Vec<( &'a String, &'a AggregateType, - Option, + Option>, )> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option @@ -429,11 +430,11 @@ impl Segment { pub fn aggregate_by_group_using_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> Vec { + ) -> Vec> { log::debug!("aggregate_by_group_with_sort_unsorted called"); if window > 0 { @@ -563,7 +564,7 @@ impl Segment { pub fn aggregate_by_group_using_stream<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, @@ -640,7 +641,7 @@ impl Segment { // available and appropriately sorted this method will build a result set of // aggregates in a streaming way. pub fn stream_grouped_aggregates<'a>( - mut group_itrs: Vec>, + mut group_itrs: Vec>, aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>, total_rows: usize, window: i64, @@ -743,7 +744,11 @@ impl Segment { vec![] } - pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { + pub fn sum_column( + &self, + name: &str, + row_ids: &mut croaring::Bitmap, + ) -> Option> { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); } @@ -765,7 +770,7 @@ impl Segment { pub fn filter_by_predicates_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], ) -> Option { if !self.meta.overlaps_time_range(time_range.0, time_range.1) { return None; // segment doesn't have time range @@ -783,7 +788,7 @@ impl Segment { fn filter_by_predicates_eq_time( &self, time_range: (i64, i64), - predicates: Vec<(&str, Option<&column::Scalar>)>, + predicates: Vec<(&str, Option<&column::Scalar<'_>>)>, ) -> Option { // Get all row_ids matching the time range: // @@ -821,7 +826,7 @@ impl Segment { // meta row_ids bitmap. fn filter_by_predicates_eq_no_time( &self, - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], ) -> Option { if predicates.is_empty() { // In this case there are no predicates provided and we have no time @@ -865,10 +870,10 @@ impl Segment { pub fn group_single_agg_by_predicate_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], group_column: &String, aggregates: &Vec<(String, column::AggregateType)>, - ) -> BTreeMap> { + ) -> BTreeMap)>> { let mut grouped_results = BTreeMap::new(); let filter_row_ids: croaring::Bitmap; @@ -884,7 +889,7 @@ impl Segment { let mut filtered_row_ids = row_ids.and(&filter_row_ids); if !filtered_row_ids.is_empty() { // First calculate all of the aggregates for this grouped value - let mut aggs: Vec<((String, AggregateType), column::Aggregate)> = + let mut aggs: Vec<((String, AggregateType), column::Aggregate<'_>)> = Vec::with_capacity(aggregates.len()); for (col_name, agg) in aggregates { @@ -1004,15 +1009,15 @@ impl<'a> Segments<'a> { pub fn read_filter_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], select_columns: Vec, - ) -> BTreeMap { + ) -> BTreeMap> { let (min, max) = time_range; if max <= min { panic!("max <= min"); } - let mut columns: BTreeMap = BTreeMap::new(); + let mut columns: BTreeMap> = BTreeMap::new(); for segment in &self.segments { if !segment.meta.overlaps_time_range(min, max) { continue; // segment doesn't have time range @@ -1040,12 +1045,12 @@ impl<'a> Segments<'a> { pub fn read_group_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, strategy: &GroupingStrategy, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { let (min, max) = time_range; if max <= min { panic!("max <= min"); @@ -1090,12 +1095,12 @@ impl<'a> Segments<'a> { fn read_group_eq_hash( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1176,12 +1181,12 @@ impl<'a> Segments<'a> { fn read_group_eq_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar>)], + predicates: &[(&str, Option<&column::Scalar<'_>>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1313,12 +1318,12 @@ impl<'a> Segments<'a> { } /// Returns the minimum value for a column in a set of segments. - pub fn column_min(&self, column_name: &str) -> Option { + pub fn column_min(&self, column_name: &str) -> Option> { if self.segments.is_empty() { return None; } - let mut min_min: Option = None; + let mut min_min: Option> = None; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let min = segment.columns[i].min(); @@ -1334,12 +1339,12 @@ impl<'a> Segments<'a> { } /// Returns the maximum value for a column in a set of segments. - pub fn column_max(&self, column_name: &str) -> Option { + pub fn column_max(&self, column_name: &str) -> Option> { if self.segments.is_empty() { return None; } - let mut max_max: Option = None; + let mut max_max: Option> = None; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let max = segment.columns[i].max(); @@ -1362,7 +1367,7 @@ impl<'a> Segments<'a> { /// If the time column has multiple max time values then the result is abitrary. /// /// TODO(edd): could return NULL value.. - pub fn first(&self, column_name: &str) -> Option<(i64, Option, usize)> { + pub fn first(&self, column_name: &str) -> Option<(i64, Option>, usize)> { // First let's find the segment with the earliest time range. // notice we order a < b on max time range. let segment = self @@ -1394,7 +1399,7 @@ impl<'a> Segments<'a> { /// If the time column has multiple max time values then the result is abitrary. /// /// TODO(edd): could return NULL value.. - pub fn last(&self, column_name: &str) -> Option<(i64, Option, usize)> { + pub fn last(&self, column_name: &str) -> Option<(i64, Option>, usize)> { // First let's find the segment with the latest time range. // notice we order a > b on max time range. let segment = self From e511c5fc4a59a2b0abc13ce83e36b22631245235 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 7 Sep 2020 18:39:57 +0100 Subject: [PATCH 48/73] refactor: stuck --- delorean_mem_qe/src/adapter.rs | 2 +- delorean_mem_qe/src/lib.rs | 2 ++ delorean_mem_qe/src/segment.rs | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs index e7fed4a721..a0fdab6af6 100644 --- a/delorean_mem_qe/src/adapter.rs +++ b/delorean_mem_qe/src/adapter.rs @@ -149,7 +149,7 @@ struct SegmentScan { predicate: Expr, } -impl SegmentScan { +impl<'a> SegmentScan { fn new(store: Arc, predicate: Expr) -> Self { let schema = store.schema().clone(); diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 4176d3cdd8..29bb987eac 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -32,6 +32,8 @@ impl Store { } pub fn segments(&self) -> Segments<'_> { + // let iter: std::slice::Iter<'a, Segment> = self.segments.iter(); + // let segments = iter.collect::>(); Segments::new(self.segments.iter().collect::>()) } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 77936fab85..26e69a181b 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -1045,12 +1045,12 @@ impl<'a> Segments<'a> { pub fn read_group_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option<&column::Scalar<'a>>)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, strategy: &GroupingStrategy, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { let (min, max) = time_range; if max <= min { panic!("max <= min"); @@ -1095,12 +1095,12 @@ impl<'a> Segments<'a> { fn read_group_eq_hash( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option<&column::Scalar<'a>>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1181,12 +1181,12 @@ impl<'a> Segments<'a> { fn read_group_eq_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option<&column::Scalar<'a>>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'_>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); From 3dd41cb71ddb2b401743dc17cc4f7d55284c515b Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 8 Sep 2020 15:44:05 +0100 Subject: [PATCH 49/73] refactor: tidy encoding --- delorean_mem_qe/src/encoding.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 4b057cfc96..32af51f4a7 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -18,8 +18,9 @@ impl PlainFixedOption { // No compression pub struct PlainFixed { values: Vec, - buf: Vec, - total_order: bool, // if true the column is totally ordered ascending. + // total_order can be used as a hint to stop scanning the column early when + // applying a comparison predicate to the column. + total_order: bool, } impl PlainFixed @@ -245,7 +246,7 @@ impl From<&[i64]> for PlainFixed { fn from(v: &[i64]) -> Self { Self { values: v.to_vec(), - buf: Vec::with_capacity(v.len()), + // buf: Vec::with_capacity(v.len()), total_order: false, } } @@ -255,7 +256,7 @@ impl From<&[f64]> for PlainFixed { fn from(v: &[f64]) -> Self { Self { values: v.to_vec(), - buf: Vec::with_capacity(v.len()), + // buf: Vec::with_capacity(v.len()), total_order: false, } } From 9a3e0d24a3ddca99288050797c43be8c84d40e0f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 8 Sep 2020 18:51:16 +0100 Subject: [PATCH 50/73] refactor: cruft --- delorean_mem_qe/src/column.rs | 73 +-------------------------------- delorean_mem_qe/src/encoding.rs | 9 ---- delorean_mem_qe/src/segment.rs | 12 +++--- 3 files changed, 6 insertions(+), 88 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 7d7618d92b..45070387c7 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -595,65 +595,7 @@ impl Column { Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)), } } - - // /// materialise all rows including and after row_id - // pub fn scan_from(&self, _row_id: usize) -> Option { - // unimplemented!("todo"); - // // if row_id >= self.num_rows() { - // // println!( - // // "asking for {:?} but only got {:?} rows", - // // row_id, - // // self.num_rows() - // // ); - // // return None; - // // } - - // // println!( - // // "asking for {:?} with a column having {:?} rows", - // // row_id, - // // self.num_rows() - // // ); - // // match self { - // // Column::String(c) => Some(Vector::String(c.scan_from(row_id))), - // // Column::Float(c) => Some(Vector::Float(c.scan_from(row_id))), - // // Column::Integer(c) => Some(Vector::Integer(c.scan_from(row_id))), - // // } - // } - - /// Given the provided row_id scans the column until a non-null value found - /// or the column is exhausted. - pub fn scan_from_until_some(&self, row_id: usize) -> Option> { - match self { - Column::String(c) => { - if row_id >= self.num_rows() { - return None; - } - - match c.scan_from_until_some(row_id) { - Some(v) => Some(Scalar::String(v)), - None => None, - } - } - Column::Float(c) => { - if row_id >= self.num_rows() { - return None; - } - match c.scan_from_until_some(row_id) { - Some(v) => Some(Scalar::Float(v)), - None => None, - } - } - Column::Integer(c) => { - if row_id >= self.num_rows() { - return None; - } - match c.scan_from_until_some(row_id) { - Some(v) => Some(Scalar::Integer(v)), - None => None, - } - } - } - } +} pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool { match self { @@ -1017,11 +959,6 @@ impl String { self.data.scan_from(row_id) } - pub fn scan_from_until_some(&self, _row_id: usize) -> Option<&std::string::String> { - unreachable!("don't need this"); - // self.data.scan_from_until_some(row_id) - } - // TODO(edd) shouldn't let roaring stuff leak out... pub fn group_row_ids(&self) -> &std::collections::BTreeMap { self.data.group_row_ids() @@ -1065,10 +1002,6 @@ impl Float { self.data.scan_from(row_id) } - pub fn scan_from_until_some(&self, row_id: usize) -> Option { - self.data.scan_from_until_some(row_id) - } - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 { self.data.sum_by_ids(row_ids) } @@ -1138,10 +1071,6 @@ impl Integer { self.data.scan_from(row_id) } - pub fn scan_from_until_some(&self, row_id: usize) -> Option { - self.data.scan_from_until_some(row_id) - } - /// Find the first logical row that contains this value. pub fn row_id_eq_value(&self, v: i64) -> Option { if !self.meta.maybe_contains_value(v) { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 32af51f4a7..1bed37f027 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -75,15 +75,6 @@ where self.values.clone() } - // TODO(edd): fix this when added NULL support - pub fn scan_from_until_some(&self, _row_id: usize) -> Option { - unreachable!("to remove"); - // for v in self.values.iter().skip(row_id) { - // return Some(*v); - // } - // None - } - pub fn scan_from(&self, row_id: usize) -> &[T] { &self.values[row_id..] } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 26e69a181b..47e673b28f 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -944,24 +944,22 @@ pub struct SegmentMetaData { column_names: Vec, time_range: (i64, i64), - // row_ids is a bitmap containing all row ids. - row_ids: croaring::Bitmap, + // row_ids: croaring::Bitmap, // TODO column sort order } impl SegmentMetaData { pub fn new(rows: usize, schema: Schema) -> Self { - let mut meta = Self { + Self { size: 0, rows, schema, column_names: vec![], time_range: (0, 0), - row_ids: croaring::Bitmap::create_with_capacity(rows as u32), - }; - meta.row_ids.add_range(0..rows as u64); - meta + // row_ids: croaring::Bitmap::create_with_capacity(rows as u32), + } + // meta.row_ids.add_range(0..rows as u64); } pub fn schema(&self) -> SchemaRef { From e5f9c7c57433a3a3d47175001757a70d3702118a Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 10 Sep 2020 21:07:28 +0100 Subject: [PATCH 51/73] refactor: add encoding trait --- Cargo.lock | 40 +++++++-- delorean_mem_qe/src/bin/main.rs | 8 +- delorean_mem_qe/src/column.rs | 22 ++--- delorean_mem_qe/src/encoding.rs | 151 ++++++++++++++++++++++++++++++-- delorean_mem_qe/src/segment.rs | 4 +- 5 files changed, 190 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed5f18fdfb..6c92c9ccb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -87,7 +87,11 @@ checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" +<<<<<<< HEAD source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" +======= +source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" +>>>>>>> 27b73c4... refactor: add encoding trait dependencies = [ "chrono", "csv", @@ -107,7 +111,7 @@ dependencies = [ [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" +source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" dependencies = [ "chrono", "csv", @@ -665,7 +669,27 @@ dependencies = [ [[package]] name = "datafusion" version = "2.0.0-SNAPSHOT" +<<<<<<< HEAD source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" +======= +source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" +dependencies = [ + "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "clap", + "crossbeam", + "fnv", + "num_cpus", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "paste", + "rustyline", + "sqlparser", +] + +[[package]] +name = "datafusion" +version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" +>>>>>>> 27b73c4... refactor: add encoding trait dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "chrono", @@ -689,7 +713,11 @@ dependencies = [ "clap", "criterion", "csv", +<<<<<<< HEAD "delorean_arrow", +======= + "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)", +>>>>>>> 27b73c4... refactor: add encoding trait "delorean_generated_types", "delorean_ingest", "delorean_line_parser", @@ -784,16 +812,16 @@ dependencies = [ name = "delorean_mem_qe" version = "0.1.0" dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "chrono", "croaring", "crossbeam", - "datafusion", + "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "delorean_table", "env_logger", "human_format", "log", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "snafu", ] @@ -2073,9 +2101,9 @@ dependencies = [ [[package]] name = "parquet" version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a#aa6889a74c57d6faea0d27ea8013d9b0c7ef809a" +source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=aa6889a74c57d6faea0d27ea8013d9b0c7ef809a)", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)", "brotli", "byteorder", "chrono", diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index d4b810467d..b172338da8 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -121,10 +121,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - // if i < 364 { - // i += 1; - // continue; - // } + if i < 364 { + i += 1; + continue; + } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 45070387c7..fcef27c1b8 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -595,7 +595,6 @@ impl Column { Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)), } } -} pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool { match self { @@ -965,12 +964,13 @@ impl String { } } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct Float { meta: metadata::F64, // TODO(edd): compression of float columns - data: encoding::PlainFixed, + // data: encoding::PlainFixed, + data: Box>, } impl Float { @@ -1029,17 +1029,17 @@ impl From<&[f64]> for Float { Self { meta: metadata::F64::new((min, max), len), - data: encoding::PlainFixed::from(values), + data: Box::new(encoding::PlainFixed::from(values)), } } } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct Integer { meta: metadata::I64, // TODO(edd): compression of integers - data: encoding::PlainFixed, + data: Box>, } impl Integer { @@ -1078,14 +1078,6 @@ impl Integer { } self.data.row_id_eq_value(v) } - - /// Find the first logical row that contains a value >= v - pub fn row_id_ge_value(&self, v: i64) -> Option { - if self.meta.max() < v { - return None; - } - self.data.row_id_ge_value(v) - } } impl From<&[i64]> for Integer { @@ -1102,7 +1094,7 @@ impl From<&[i64]> for Integer { Self { meta: metadata::I64::new((min, max), len), - data: encoding::PlainFixed::from(values), + data: Box::new(encoding::PlainFixed::from(values)), } } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 1bed37f027..9c04c2d609 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -1,16 +1,39 @@ use std::collections::{BTreeMap, BTreeSet}; use std::iter; -// TODO(edd): this is just for convenience. In reality one would store nulls -// separately and not use `Option`. -#[derive(Debug, Default)] -pub struct PlainFixedOption { - values: Vec>, +use arrow::array::Array; + +pub trait NumericEncoding: Send + Sync { + type Item; + + fn size(&self) -> usize; + fn value(&self, row_id: usize) -> Self::Item; + fn values(&self, row_ids: &[usize]) -> Vec; + fn encoded_values(&self, row_ids: &[usize]) -> Vec; + fn all_encoded_values(&self) -> Vec; + fn scan_from(&self, row_id: usize) -> &[Self::Item]; + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item; + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item; + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; + + fn row_id_eq_value(&self, v: Self::Item) -> Option; + fn row_ids_single_cmp_roaring( + &self, + wanted: &Self::Item, + order: std::cmp::Ordering, + ) -> croaring::Bitmap; + fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap; } -impl PlainFixedOption { - pub fn size(&self) -> usize { - self.values.len() * std::mem::size_of::>() +impl std::fmt::Debug for dyn NumericEncoding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", "todo") + } +} + +impl std::fmt::Debug for dyn NumericEncoding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", "todo") } } @@ -253,6 +276,118 @@ impl From<&[f64]> for PlainFixed { } } +impl NumericEncoding for PlainFixed { + type Item = f64; + + fn size(&self) -> usize { + self.size() + } + + fn value(&self, row_id: usize) -> Self::Item { + self.value(row_id) + } + + fn values(&self, row_ids: &[usize]) -> Vec { + self.values(row_ids) + } + + fn encoded_values(&self, row_ids: &[usize]) -> Vec { + self.encoded_values(row_ids) + } + + fn all_encoded_values(&self) -> Vec { + self.all_encoded_values() + } + + fn scan_from(&self, row_id: usize) -> &[Self::Item] { + self.scan_from(row_id) + } + + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { + self.sum_by_ids(row_ids) + } + + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { + self.sum_by_id_range(from_row_id, to_row_id) + } + + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + self.count_by_id_range(from_row_id, to_row_id) + } + + fn row_ids_single_cmp_roaring( + &self, + wanted: &Self::Item, + order: std::cmp::Ordering, + ) -> croaring::Bitmap { + self.row_ids_single_cmp_roaring(wanted, order) + } + + fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { + self.row_ids_gte_lt_roaring(from, to) + } + + fn row_id_eq_value(&self, v: Self::Item) -> Option { + self.row_id_eq_value(v) + } +} + +impl NumericEncoding for PlainFixed { + type Item = i64; + + fn size(&self) -> usize { + self.size() + } + + fn value(&self, row_id: usize) -> Self::Item { + self.value(row_id) + } + + fn values(&self, row_ids: &[usize]) -> Vec { + self.values(row_ids) + } + + fn encoded_values(&self, row_ids: &[usize]) -> Vec { + self.encoded_values(row_ids) + } + + fn all_encoded_values(&self) -> Vec { + self.all_encoded_values() + } + + fn scan_from(&self, row_id: usize) -> &[Self::Item] { + self.scan_from(row_id) + } + + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { + self.sum_by_ids(row_ids) + } + + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { + self.sum_by_id_range(from_row_id, to_row_id) + } + + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + self.count_by_id_range(from_row_id, to_row_id) + } + + fn row_ids_single_cmp_roaring( + &self, + wanted: &Self::Item, + order: std::cmp::Ordering, + ) -> croaring::Bitmap { + self.row_ids_single_cmp_roaring(wanted, order) + } + + fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { + self.row_ids_gte_lt_roaring(from, to) + } + + fn row_id_eq_value(&self, v: Self::Item) -> Option { + self.row_id_eq_value(v) + } +} + #[derive(Debug, Default)] pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 47e673b28f..9a15b03430 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -423,8 +423,8 @@ impl Segment { } // println!("groups: {:?}", hash_table.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); - // BTreeMap::new() - hash_table + BTreeMap::new() + // hash_table } pub fn aggregate_by_group_using_sort( From 9f299461ed428f3ec612e80de7776791a9dd42ba Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 11 Sep 2020 11:11:38 +0100 Subject: [PATCH 52/73] feat: working on arrow backing --- delorean_mem_qe/src/encoding.rs | 214 +++++++++++++++++++++++++++++++- 1 file changed, 213 insertions(+), 1 deletion(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 9c04c2d609..e19d533306 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -1,7 +1,38 @@ use std::collections::{BTreeMap, BTreeSet}; use std::iter; -use arrow::array::Array; +use arrow::array::{Array, PrimitiveArray}; +use arrow::datatypes::ArrowNumericType; +use arrow::datatypes::*; + +pub trait Encoding: Send + Sync { + type Item; + + fn size(&self) -> usize; + fn value(&self, row_id: usize) -> Self::Item; + fn values(&self, row_ids: &[usize]) -> Vec; + fn encoded_values(&self, row_ids: &[usize]) -> Vec; + fn all_encoded_values(&self) -> Vec; + fn scan_from(&self, row_id: usize) -> &[Self::Item]; + + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; + + // TODO(edd): clean up the API for getting row ids that match predicates. + // + // Ideally you should be able to provide a collection of predicates to + // match on. + // + // A simpler approach would be to provide a method that matches on a single + // predicate and then call that multiple times, unioning or intersecting the + // resulting row sets. + fn row_id_eq_value(&self, v: Self::Item) -> Option; + fn row_ids_single_cmp_roaring( + &self, + wanted: &Self::Item, + order: std::cmp::Ordering, + ) -> croaring::Bitmap; + fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap; +} pub trait NumericEncoding: Send + Sync { type Item; @@ -37,6 +68,173 @@ impl std::fmt::Debug for dyn NumericEncoding { } } +pub struct PlainArrow +where + // T: ArrowNumericType + std::ops::Add, + T: ArrowNumericType, + // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, +{ + arr: PrimitiveArray, + // _phantom: T, +} + +impl PlainArrow +where + // T: ArrowNumericType + std::ops::Add, + T: ArrowNumericType, + // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, +{ + pub fn size(&self) -> usize { + self.arr.len() + } + + pub fn value(&self, row_id: usize) -> Option { + if self.arr.is_null(row_id) { + return None; + } + Some(self.arr.value(row_id)) + } + + fn values(&self, row_ids: &[usize]) -> Vec> { + let mut out = Vec::with_capacity(row_ids.len()); + for &row_id in row_ids { + if self.arr.is_null(row_id) { + out.push(None) + } else { + out.push(Some(self.arr.value(row_id))) + } + } + assert_eq!(out.len(), row_ids.len()); + out + } + + /// Well this is terribly slow + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec> { + self.values(row_ids) + } + + /// TODO(edd): there must be a more efficient way. + pub fn all_encoded_values(&self) -> Vec> { + let mut out = Vec::with_capacity(self.arr.len()); + for i in 0..self.arr.len() { + if self.arr.is_null(i) { + out.push(None) + } else { + out.push(Some(self.arr.value(i))) + } + } + assert_eq!(out.len(), self.arr.len()); + out + } + + pub fn scan_from(&self, row_id: usize) -> &[Option] { + // todo + + &[] + + // let mut out = Vec::with_capacity(self.arr.len() - row_id); + // for i in row_id..self.arr.len() { + // if self.arr.is_null(i) { + // out.push(None) + // } else { + // out.push(Some(self.arr.value(i))) + // } + // } + // assert_eq!(out.len(), self.arr.len()); + // out.as_slice() + } + + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { + // let mut res = T::Native::default(); + + // // HMMMMM - materialising which has a memory cost. + // let vec = row_ids.to_vec(); + // for v in vec { + // res += self.arr.value(v as usize); + // } + None // todo + } + + pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { + // if the column contains a null value between the range then the result + // will be None. + for i in from_row_id..to_row_id { + if self.arr.is_null(i) { + return None; + } + } + + // Otherwise sum all the values between in the range. + // let mut res = f64::from(self.arr.value(from_row_id)); + // for i in from_row_id + 1..to_row_id { + // res = res + self.arr.value(i); + // } + // Some(res) + None + } + + pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + // TODO - count values that are not null in the row range. + 0 // todo + } +} + +impl NumericEncoding for PlainArrow { + type Item = Option; + + fn size(&self) -> usize { + self.size() + } + + fn value(&self, row_id: usize) -> Self::Item { + self.value(row_id) + } + + fn values(&self, row_ids: &[usize]) -> Vec { + self.values(row_ids) + } + + fn encoded_values(&self, row_ids: &[usize]) -> Vec { + self.encoded_values(row_ids) + } + + fn all_encoded_values(&self) -> Vec { + self.all_encoded_values() + } + + fn scan_from(&self, row_id: usize) -> &[Self::Item] { + self.scan_from(row_id) + } + + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { + self.sum_by_ids(row_ids) + } + + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { + self.sum_by_id_range(from_row_id, to_row_id) + } + + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + self.count_by_id_range(from_row_id, to_row_id) + } + + fn row_ids_single_cmp_roaring( + &self, + wanted: &Self::Item, + order: std::cmp::Ordering, + ) -> croaring::Bitmap { + self.row_ids_single_cmp_roaring(wanted, order) + } + + fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { + self.row_ids_gte_lt_roaring(from, to) + } + + fn row_id_eq_value(&self, v: Self::Item) -> Option { + self.row_id_eq_value(v) + } +} + #[derive(Debug, Default)] // No compression pub struct PlainFixed { @@ -761,6 +959,20 @@ impl std::convert::From<&delorean_table::Packer> for #[cfg(test)] mod test { + + #[test] + fn plain_arrow() { + let col = super::PlainArrow { + arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]), + }; + + let encoded = col.all_encoded_values(); + assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]); + + let sum = col.sum_by_id_range(0, 1); + assert_eq!(sum, Some(46.86)); + } + #[test] fn plain_row_ids_roaring_eq() { let input = vec![1, 1, 1, 1, 3, 4, 4, 5, 6, 5, 5, 5, 1, 5]; From 47b2f7940b4817589ec09bbcd88bfe0245afeeb5 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 11 Sep 2020 13:39:02 +0100 Subject: [PATCH 53/73] refactor: spike on arrow encoding --- Cargo.lock | 10 + delorean_mem_qe/Cargo.toml | 4 +- delorean_mem_qe/src/bin/main.rs | 1 + delorean_mem_qe/src/column.rs | 96 +++++++- delorean_mem_qe/src/encoding.rs | 419 +++++++++++++------------------- delorean_mem_qe/src/segment.rs | 15 ++ 6 files changed, 281 insertions(+), 264 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c92c9ccb3..63e340c752 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -819,6 +819,7 @@ dependencies = [ "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "delorean_table", "env_logger", + "heapsize", "human_format", "log", "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", @@ -1356,6 +1357,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "heapsize" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1679e6ea370dee694f91f1dc469bf94cf8f52051d147aec3e1f9497c6fc22461" +dependencies = [ + "winapi 0.3.8", +] + [[package]] name = "heck" version = "0.3.1" diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index c2086ac66f..742df5ba19 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -12,9 +12,7 @@ delorean_table = { path = "../delorean_table" } arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } -#arrow = { path = "/Users/alamb/Software/arrow/rust/arrow" } -#parquet = { path = "/Users/alamb/Software/arrow/rust/parquet" } -#datafusion = { path = "/Users/alamb/Software/arrow/rust/datafusion" } +heapsize = "0.4.2" snafu = "0.6.8" croaring = "0.4.5" crossbeam = "0.7.3" diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index b172338da8..90acdd167f 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -134,6 +134,7 @@ fn build_store( let mut segment = Segment::new(rb.num_rows(), schema); convert_record_batch(rb, &mut segment)?; + println!("{}", &segment); store.add_segment(segment); } Ok(None) => { diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index fcef27c1b8..e0b5df55dc 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -873,6 +873,23 @@ impl Column { } } +impl std::fmt::Display for Column { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + Column::String(c) => { + write!(f, "{}", c)?; + } + Column::Float(c) => { + write!(f, "{}", c)?; + } + Column::Integer(c) => { + write!(f, "{}", c)?; + } + } + Ok(()) + } +} + impl AggregatableByRange for &Column { fn aggregate_by_id_range( &self, @@ -964,6 +981,12 @@ impl String { } } +impl std::fmt::Display for String { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Meta: {}, Data: {}", self.meta, self.data) + } +} + #[derive(Debug)] pub struct Float { meta: metadata::F64, @@ -1015,6 +1038,12 @@ impl Float { } } +impl std::fmt::Display for Float { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Meta: {}, Data: {}", self.meta, self.data) + } +} + impl From<&[f64]> for Float { fn from(values: &[f64]) -> Self { let len = values.len(); @@ -1034,6 +1063,32 @@ impl From<&[f64]> for Float { } } +// use arrow::array::Array; +// impl From> for Float { +// fn from(arr: arrow::array::PrimitiveArray) -> Self { +// let len = arr.len(); +// let mut min = std::f64::MAX; +// let mut max = std::f64::MIN; + +// // calculate min/max for meta data +// // TODO(edd): can use compute kernels for this. +// for i in 0..arr.len() { +// if arr.is_null(i) { +// continue; +// } + +// let v = arr.value(i); +// min = min.min(v); +// max = max.max(v); +// } + +// Self { +// meta: metadata::F64::new((min, max), len), +// data: Box::new(encoding::PlainArrow { arr }), +// } +// } +// } + #[derive(Debug)] pub struct Integer { meta: metadata::I64, @@ -1080,6 +1135,12 @@ impl Integer { } } +impl std::fmt::Display for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Meta: {}, Data: {}", self.meta, self.data) + } +} + impl From<&[i64]> for Integer { fn from(values: &[i64]) -> Self { let len = values.len(); @@ -1100,11 +1161,12 @@ impl From<&[i64]> for Integer { } pub mod metadata { + use std::mem::size_of; + #[derive(Debug, Default)] pub struct Str { range: (Option, Option), num_rows: usize, - // sparse_index: BTreeMap, } impl Str { @@ -1145,8 +1207,20 @@ pub mod metadata { } pub fn size(&self) -> usize { - // TODO!!!! - 0 //self.range.0.len() + self.range.1.len() + std::mem::size_of::() + // size of types for num_rows and range + let base_size = size_of::() + (2 * size_of::>()); + match &self.range { + (None, None) => base_size, + (Some(min), None) => base_size + min.len(), + (None, Some(max)) => base_size + max.len(), + (Some(min), Some(max)) => base_size + min.len() + max.len(), + } + } + } + + impl std::fmt::Display for Str { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Range: ({:?})", self.range) } } @@ -1184,7 +1258,13 @@ pub mod metadata { } pub fn size(&self) -> usize { - std::mem::size_of::<(f64, f64)>() + std::mem::size_of::() + size_of::() + (size_of::<(f64, f64)>()) + } + } + + impl std::fmt::Display for F64 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Range: ({:?})", self.range) } } @@ -1219,7 +1299,13 @@ pub mod metadata { } pub fn size(&self) -> usize { - std::mem::size_of::<(i64, i64)>() + std::mem::size_of::() + size_of::() + (size_of::<(i64, i64)>()) + } + } + + impl std::fmt::Display for I64 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Range: ({:?})", self.range) } } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index e19d533306..54b90b0a72 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -1,11 +1,11 @@ use std::collections::{BTreeMap, BTreeSet}; use std::iter; +use std::mem::size_of; use arrow::array::{Array, PrimitiveArray}; use arrow::datatypes::ArrowNumericType; -use arrow::datatypes::*; -pub trait Encoding: Send + Sync { +pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug { type Item; fn size(&self) -> usize; @@ -15,39 +15,15 @@ pub trait Encoding: Send + Sync { fn all_encoded_values(&self) -> Vec; fn scan_from(&self, row_id: usize) -> &[Self::Item]; - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; - - // TODO(edd): clean up the API for getting row ids that match predicates. - // - // Ideally you should be able to provide a collection of predicates to - // match on. - // - // A simpler approach would be to provide a method that matches on a single - // predicate and then call that multiple times, unioning or intersecting the - // resulting row sets. - fn row_id_eq_value(&self, v: Self::Item) -> Option; - fn row_ids_single_cmp_roaring( - &self, - wanted: &Self::Item, - order: std::cmp::Ordering, - ) -> croaring::Bitmap; - fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap; -} - -pub trait NumericEncoding: Send + Sync { - type Item; - - fn size(&self) -> usize; - fn value(&self, row_id: usize) -> Self::Item; - fn values(&self, row_ids: &[usize]) -> Vec; - fn encoded_values(&self, row_ids: &[usize]) -> Vec; - fn all_encoded_values(&self) -> Vec; - fn scan_from(&self, row_id: usize) -> &[Self::Item]; fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item; fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item; + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; + fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64; fn row_id_eq_value(&self, v: Self::Item) -> Option; + fn row_id_ge_value(&self, v: Self::Item) -> Option; + fn row_ids_single_cmp_roaring( &self, wanted: &Self::Item, @@ -56,39 +32,37 @@ pub trait NumericEncoding: Send + Sync { fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap; } -impl std::fmt::Debug for dyn NumericEncoding { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", "todo") - } -} - -impl std::fmt::Debug for dyn NumericEncoding { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", "todo") - } -} - +#[derive(Debug)] pub struct PlainArrow where - // T: ArrowNumericType + std::ops::Add, T: ArrowNumericType, - // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, + T::Native: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::ops::Add, { arr: PrimitiveArray, - // _phantom: T, } -impl PlainArrow +impl NumericEncoding for PlainArrow where - // T: ArrowNumericType + std::ops::Add, - T: ArrowNumericType, - // T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, + T: ArrowNumericType + std::fmt::Debug, + T::Native: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::ops::Add, { - pub fn size(&self) -> usize { + type Item = Option; + + fn size(&self) -> usize { self.arr.len() } - pub fn value(&self, row_id: usize) -> Option { + fn value(&self, row_id: usize) -> Option { if self.arr.is_null(row_id) { return None; } @@ -109,12 +83,12 @@ where } /// Well this is terribly slow - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec> { + fn encoded_values(&self, row_ids: &[usize]) -> Vec> { self.values(row_ids) } /// TODO(edd): there must be a more efficient way. - pub fn all_encoded_values(&self) -> Vec> { + fn all_encoded_values(&self) -> Vec> { let mut out = Vec::with_capacity(self.arr.len()); for i in 0..self.arr.len() { if self.arr.is_null(i) { @@ -127,11 +101,10 @@ where out } - pub fn scan_from(&self, row_id: usize) -> &[Option] { - // todo - - &[] - + // TODO(edd): problem here is returning a slice because we need to own the + // backing vector. + fn scan_from(&self, row_id: usize) -> &[Option] { + unimplemented!("need to figure out returning a slice"); // let mut out = Vec::with_capacity(self.arr.len() - row_id); // for i in row_id..self.arr.len() { // if self.arr.is_null(i) { @@ -144,18 +117,22 @@ where // out.as_slice() } - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { - // let mut res = T::Native::default(); - - // // HMMMMM - materialising which has a memory cost. - // let vec = row_ids.to_vec(); - // for v in vec { - // res += self.arr.value(v as usize); - // } - None // todo + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { + // TODO(edd): this is expensive - may pay to expose method to do this + // where you accept an array. + let mut res = T::Native::default(); + let vec = row_ids.to_vec(); + for row_id in vec { + let i = row_id as usize; + if self.arr.is_null(i) { + return None; + } + res = res + self.arr.value(i); + } + Some(res) } - pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { // if the column contains a null value between the range then the result // will be None. for i in from_row_id..to_row_id { @@ -165,57 +142,35 @@ where } // Otherwise sum all the values between in the range. - // let mut res = f64::from(self.arr.value(from_row_id)); - // for i in from_row_id + 1..to_row_id { - // res = res + self.arr.value(i); - // } - // Some(res) - None - } - - pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - // TODO - count values that are not null in the row range. - 0 // todo - } -} - -impl NumericEncoding for PlainArrow { - type Item = Option; - - fn size(&self) -> usize { - self.size() - } - - fn value(&self, row_id: usize) -> Self::Item { - self.value(row_id) - } - - fn values(&self, row_ids: &[usize]) -> Vec { - self.values(row_ids) - } - - fn encoded_values(&self, row_ids: &[usize]) -> Vec { - self.encoded_values(row_ids) - } - - fn all_encoded_values(&self) -> Vec { - self.all_encoded_values() - } - - fn scan_from(&self, row_id: usize) -> &[Self::Item] { - self.scan_from(row_id) - } - - fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { - self.sum_by_ids(row_ids) - } - - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { - self.sum_by_id_range(from_row_id, to_row_id) + let mut res = T::Native::default(); + for i in from_row_id..to_row_id { + res = res + self.arr.value(i); + } + Some(res) } fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - self.count_by_id_range(from_row_id, to_row_id) + // TODO - count values that are not null in the row range. + let mut count = 0; + for i in from_row_id..to_row_id { + if self.arr.is_null(i) { + continue; + } + count += 1; + } + count + } + + fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { + todo!() + } + + fn row_id_eq_value(&self, v: Self::Item) -> Option { + todo!() + } + + fn row_id_ge_value(&self, v: Self::Item) -> Option { + todo!() } fn row_ids_single_cmp_roaring( @@ -223,15 +178,26 @@ impl NumericEncoding for PlainArrow { wanted: &Self::Item, order: std::cmp::Ordering, ) -> croaring::Bitmap { - self.row_ids_single_cmp_roaring(wanted, order) + todo!() } fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { - self.row_ids_gte_lt_roaring(from, to) + todo!() } +} - fn row_id_eq_value(&self, v: Self::Item) -> Option { - self.row_id_eq_value(v) +impl std::fmt::Display for PlainArrow +where + T: ArrowNumericType + std::fmt::Debug, + T::Native: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::ops::Add, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[PlainArrow] size: {}", self.size()) } } @@ -242,31 +208,60 @@ pub struct PlainFixed { // total_order can be used as a hint to stop scanning the column early when // applying a comparison predicate to the column. total_order: bool, + + size: usize, } -impl PlainFixed +impl std::fmt::Display for PlainFixed where - T: Default + PartialEq + PartialOrd + Copy + std::fmt::Debug + std::ops::AddAssign, + T: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::fmt::Display + + Sync + + Send + + std::ops::AddAssign, { - pub fn size(&self) -> usize { - self.values.len() * std::mem::size_of::() + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[PlainFixed] size: {}", self.size(),) + } +} + +impl NumericEncoding for PlainFixed +where + T: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::fmt::Display + + Sync + + Send + + std::ops::AddAssign, +{ + type Item = T; + + fn size(&self) -> usize { + self.size } - pub fn row_id_eq_value(&self, v: T) -> Option { + fn row_id_eq_value(&self, v: T) -> Option { self.values.iter().position(|x| *x == v) } - pub fn row_id_ge_value(&self, v: T) -> Option { + fn row_id_ge_value(&self, v: T) -> Option { self.values.iter().position(|x| *x >= v) } // get value at row_id. Panics if out of bounds. - pub fn value(&self, row_id: usize) -> T { + fn value(&self, row_id: usize) -> T { self.values[row_id] } /// Return the decoded values for the provided logical row ids. - pub fn values(&self, row_ids: &[usize]) -> Vec { + fn values(&self, row_ids: &[usize]) -> Vec { let mut out = Vec::with_capacity(row_ids.len()); for chunks in row_ids.chunks_exact(4) { out.push(self.values[chunks[3]]); @@ -286,24 +281,24 @@ where /// Return the raw encoded values for the provided logical row ids. For Plain /// encoding this is just the decoded values. - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { + fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.values(row_ids) } /// Return all encoded values. For this encoding this is just the decoded /// values - pub fn all_encoded_values(&self) -> Vec { + fn all_encoded_values(&self) -> Vec { self.values.clone() } - pub fn scan_from(&self, row_id: usize) -> &[T] { + fn scan_from(&self, row_id: usize) -> &[T] { &self.values[row_id..] } /// returns a set of row ids that match a single ordering on a desired value /// /// This supports `value = x` , `value < x` or `value > x`. - pub fn row_ids_single_cmp_roaring( + fn row_ids_single_cmp_roaring( &self, wanted: &T, order: std::cmp::Ordering, @@ -343,7 +338,7 @@ where /// returns a set of row ids that match the half open interval `[from, to)`. /// /// The main use-case for this is time range filtering. - pub fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap { + fn row_ids_gte_lt_roaring(&self, from: &T, to: &T) -> croaring::Bitmap { let mut bm = croaring::Bitmap::create(); let mut found = false; //self.values[0]; @@ -376,7 +371,7 @@ where bm } - pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T { let mut res = T::default(); for v in self.values[from_row_id..to_row_id].iter() { res += *v; @@ -384,12 +379,12 @@ where res } - pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { to_row_id - from_row_id } // TODO(edd): make faster - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T { + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T { let mut res = T::default(); // println!( // "cardinality is {:?} out of {:?}", @@ -449,7 +444,7 @@ where res } - pub fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { + fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { row_ids.cardinality() } } @@ -460,6 +455,10 @@ impl From<&[i64]> for PlainFixed { values: v.to_vec(), // buf: Vec::with_capacity(v.len()), total_order: false, + size: size_of::>() + + (size_of::() * v.len()) + + size_of::() + + size_of::(), } } } @@ -470,122 +469,14 @@ impl From<&[f64]> for PlainFixed { values: v.to_vec(), // buf: Vec::with_capacity(v.len()), total_order: false, + size: size_of::>() + + (size_of::() * v.len()) + + size_of::() + + size_of::(), } } } -impl NumericEncoding for PlainFixed { - type Item = f64; - - fn size(&self) -> usize { - self.size() - } - - fn value(&self, row_id: usize) -> Self::Item { - self.value(row_id) - } - - fn values(&self, row_ids: &[usize]) -> Vec { - self.values(row_ids) - } - - fn encoded_values(&self, row_ids: &[usize]) -> Vec { - self.encoded_values(row_ids) - } - - fn all_encoded_values(&self) -> Vec { - self.all_encoded_values() - } - - fn scan_from(&self, row_id: usize) -> &[Self::Item] { - self.scan_from(row_id) - } - - fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { - self.sum_by_ids(row_ids) - } - - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { - self.sum_by_id_range(from_row_id, to_row_id) - } - - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - self.count_by_id_range(from_row_id, to_row_id) - } - - fn row_ids_single_cmp_roaring( - &self, - wanted: &Self::Item, - order: std::cmp::Ordering, - ) -> croaring::Bitmap { - self.row_ids_single_cmp_roaring(wanted, order) - } - - fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { - self.row_ids_gte_lt_roaring(from, to) - } - - fn row_id_eq_value(&self, v: Self::Item) -> Option { - self.row_id_eq_value(v) - } -} - -impl NumericEncoding for PlainFixed { - type Item = i64; - - fn size(&self) -> usize { - self.size() - } - - fn value(&self, row_id: usize) -> Self::Item { - self.value(row_id) - } - - fn values(&self, row_ids: &[usize]) -> Vec { - self.values(row_ids) - } - - fn encoded_values(&self, row_ids: &[usize]) -> Vec { - self.encoded_values(row_ids) - } - - fn all_encoded_values(&self) -> Vec { - self.all_encoded_values() - } - - fn scan_from(&self, row_id: usize) -> &[Self::Item] { - self.scan_from(row_id) - } - - fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item { - self.sum_by_ids(row_ids) - } - - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item { - self.sum_by_id_range(from_row_id, to_row_id) - } - - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - self.count_by_id_range(from_row_id, to_row_id) - } - - fn row_ids_single_cmp_roaring( - &self, - wanted: &Self::Item, - order: std::cmp::Ordering, - ) -> croaring::Bitmap { - self.row_ids_single_cmp_roaring(wanted, order) - } - - fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { - self.row_ids_gte_lt_roaring(from, to) - } - - fn row_id_eq_value(&self, v: Self::Item) -> Option { - self.row_id_eq_value(v) - } -} - #[derive(Debug, Default)] pub struct DictionaryRLE { // stores the mapping between an entry and its assigned index. @@ -602,7 +493,6 @@ pub struct DictionaryRLE { // stores tuples where each pair refers to a dictionary entry and the number // of times the entry repeats. run_lengths: Vec<(usize, u64)>, - run_length_size: usize, total: u64, } @@ -615,7 +505,6 @@ impl DictionaryRLE { index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), - run_length_size: 0, total: 0, } } @@ -627,7 +516,6 @@ impl DictionaryRLE { index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), - run_length_size: 0, total: 0, }; @@ -663,7 +551,6 @@ impl DictionaryRLE { } else { // start a new run-length self.run_lengths.push((*idx, additional)); - self.run_length_size += std::mem::size_of::<(usize, u64)>(); } self.index_row_ids .get_mut(&(*idx as u32)) @@ -690,7 +577,6 @@ impl DictionaryRLE { .get_mut(&(idx as u32)) .unwrap() .add_range(self.total..self.total + additional); - self.run_length_size += std::mem::size_of::<(usize, u64)>(); } } } @@ -927,8 +813,28 @@ impl DictionaryRLE { } pub fn size(&self) -> usize { - // mapping and reverse mapping then the rles - 2 * self.map_size + self.run_length_size + // entry_index: BTreeMap, usize>, + + // // stores the mapping between an index and its entry. + // index_entry: BTreeMap>, + + (self.index_entry.len() * size_of::>>()) + + (self.index_row_ids.len() * size_of::>()) + + size_of::() + + (self.run_lengths.len() * size_of::>()) + + size_of::() + } +} + +impl std::fmt::Display for DictionaryRLE { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[DictionaryRLE] size: {}, dict entries: {}, runs: {} ", + self.size(), + self.index_entry.len(), + self.run_lengths.len() + ) } } @@ -959,6 +865,7 @@ impl std::convert::From<&delorean_table::Packer> for #[cfg(test)] mod test { + use super::NumericEncoding; #[test] fn plain_arrow() { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 9a15b03430..f4315c8234 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -934,6 +934,21 @@ impl Segment { } } +impl std::fmt::Display for Segment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "Rows: {}\nSize: {} Columns: ", + self.num_rows(), + self.size() + )?; + for (c, name) in self.columns.iter().zip(self.column_names().iter()) { + writeln!(f, "{} {}", name, c)?; + } + Ok(()) + } +} + /// Meta data for a segment. This data is mainly used to determine if a segment /// may contain a value that can answer a query. #[derive(Debug)] From 48623d6f77dd8ae6e3f40698af291b6a9d11f0e0 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 11 Sep 2020 17:38:38 +0100 Subject: [PATCH 54/73] refactor: enable broken code --- delorean_mem_qe/src/segment.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index f4315c8234..9255817eaa 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -424,6 +424,7 @@ impl Segment { // println!("groups: {:?}", hash_table.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); BTreeMap::new() + // hash_table } From d3e819b3bd61f20516dbbdebb6a7ed73a0a7c17a Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 14 Sep 2020 11:22:13 +0100 Subject: [PATCH 55/73] refactor: get column compiling --- delorean_mem_qe/src/column.rs | 923 +++++++++++++++++++------------- delorean_mem_qe/src/encoding.rs | 141 ++--- 2 files changed, 614 insertions(+), 450 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index e0b5df55dc..4faffdc4da 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -100,7 +100,7 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { #[derive(Clone, Debug)] pub enum Aggregate<'a> { Count(u64), - Sum(Scalar<'a>), + Sum(Option>), } #[derive(Debug, Clone)] @@ -109,29 +109,16 @@ pub enum AggregateType { Sum, } -impl<'a> Aggregate<'a> { - pub fn update_with(&mut self, other: Scalar<'a>) { - match self { - Self::Count(v) => { - *v = *v + 1; - } - Self::Sum(v) => { - v.add(other); - } - } - } -} +// impl<'a> std::ops::Add> for Aggregate<'a> { +// type Output = Aggregate<'a>; -impl<'a> std::ops::Add> for Aggregate<'a> { - type Output = Aggregate<'a>; - - fn add(self, _rhs: Scalar<'a>) -> Self::Output { - match self { - Self::Count(c) => Self::Count(c + 1), - Self::Sum(s) => Self::Sum(s + &_rhs), - } - } -} +// fn add(self, _rhs: Scalar<'a>) -> Self::Output { +// match self { +// Self::Count(c) => Self::Count(c + 1), +// Self::Sum(s) => Self::Sum(s + &_rhs), +// } +// } +// } impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { type Output = Aggregate<'a>; @@ -147,7 +134,12 @@ impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { } Self::Sum(s) => { if let Self::Sum(other) = _rhs { - Self::Sum(s + other) + match (s, other) { + (None, None) => Self::Sum(None), + (None, Some(other)) => Self::Sum(Some(*other)), + (Some(s), None) => Self::Sum(Some(s)), + (Some(s), Some(other)) => Self::Sum(Some(s + other)), + } } else { panic!("invalid"); } @@ -167,48 +159,49 @@ pub trait AggregatableByRange { /// A Vector is a materialised vector of values from a column. pub enum Vector<'a> { String(Vec<&'a Option>), - Float(Vec), - Integer(Vec), + EncodedString(Vec), + Float(Vec>), + Integer(Vec>), } impl<'a> Vector<'a> { - pub fn aggregate_by_id_range( - &self, - agg_type: &AggregateType, - from_row_id: usize, - to_row_id: usize, - ) -> Aggregate<'a> { - match agg_type { - AggregateType::Count => { - Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) - } - AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), - } - } + // pub fn aggregate_by_id_range( + // &self, + // agg_type: &AggregateType, + // from_row_id: usize, + // to_row_id: usize, + // ) -> Aggregate<'a> { + // match agg_type { + // AggregateType::Count => { + // Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) + // } + // AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), + // } + // } - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> { - match self { - Vector::String(_) => { - panic!("can't sum strings...."); - } - Vector::Float(values) => { - let mut res = 0.0; - // TODO(edd): check asm to see if it's vectorising - for v in values[from_row_id..to_row_id].iter() { - res += *v; - } - Scalar::Float(res) - } - Vector::Integer(values) => { - let mut res = 0; - // TODO(edd): check asm to see if it's vectorising - for v in values[from_row_id..to_row_id].iter() { - res += *v; - } - Scalar::Integer(res) - } - } - } + // fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> { + // match self { + // Vector::String(_) => { + // panic!("can't sum strings...."); + // } + // Vector::Float(values) => { + // let mut res = 0.0; + // // TODO(edd): check asm to see if it's vectorising + // for v in values[from_row_id..to_row_id].iter() { + // res += *v; + // } + // Scalar::Float(res) + // } + // Vector::Integer(values) => { + // let mut res = 0; + // // TODO(edd): check asm to see if it's vectorising + // for v in values[from_row_id..to_row_id].iter() { + // res += *v; + // } + // Scalar::Integer(res) + // } + // } + // } fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { to_row_id - from_row_id @@ -335,8 +328,8 @@ impl<'a> std::fmt::Display for Vector<'a> { #[derive(Debug)] pub enum Column { String(String), - Float(Float), - Integer(Integer), + Float(NumericColumn), + Integer(NumericColumn), } impl Column { @@ -376,13 +369,23 @@ impl Column { if row_id >= self.num_rows() { return None; } - Some(Scalar::Float(c.value(row_id))) + + let v = c.value(row_id); + if let Some(v) = v { + return Some(Scalar::Float(v)); + } + None } Column::Integer(c) => { if row_id >= self.num_rows() { return None; } - Some(Scalar::Integer(c.value(row_id))) + + let v = c.value(row_id); + if let Some(v) = v { + return Some(Scalar::Integer(v)); + } + None } } } @@ -485,7 +488,7 @@ impl Column { let now = std::time::Instant::now(); let v = c.encoded_values(&row_ids_vec); log::debug!("time getting encoded values {:?}", now.elapsed()); - Vector::Integer(v) + Vector::EncodedString(v) } Column::Float(c) => { if row_ids.is_empty() { @@ -518,7 +521,7 @@ impl Column { log::debug!("time getting encoded values {:?}", now.elapsed()); log::debug!("dictionary {:?}", c.data.dictionary()); - Vector::Integer(v) + Vector::EncodedString(v) } Column::Float(c) => { if row_ids.is_empty() { @@ -546,7 +549,7 @@ impl Column { log::debug!("time getting all encoded values {:?}", now.elapsed()); log::debug!("dictionary {:?}", c.data.dictionary()); - Vector::Integer(v) + Vector::EncodedString(v) } Column::Float(c) => Vector::Float(c.all_encoded_values()), Column::Integer(c) => Vector::Integer(c.all_encoded_values()), @@ -596,28 +599,25 @@ impl Column { } } - pub fn maybe_contains(&self, value: Option<&Scalar<'_>>) -> bool { + pub fn maybe_contains(&self, value: &Scalar<'_>) -> bool { match self { - Column::String(c) => match value { - Some(scalar) => { - if let Scalar::String(v) = scalar { - c.meta.maybe_contains_value(Some(v.to_string())) - } else { - panic!("invalid value"); - } + Column::String(c) => { + if let Scalar::String(v) = value { + c.meta.maybe_contains_value(v.to_string()) + } else { + panic!("invalid value"); } - None => c.meta.maybe_contains_value(None), - }, + } Column::Float(c) => { - if let Some(Scalar::Float(v)) = value { - c.meta.maybe_contains_value(v.to_owned()) + if let Scalar::Float(v) = value { + c.meta.maybe_contains_value(*v) } else { panic!("invalid value or unsupported null"); } } Column::Integer(c) => { - if let Some(Scalar::Integer(v)) = value { - c.meta.maybe_contains_value(v.to_owned()) + if let Scalar::Integer(v) = value { + c.meta.maybe_contains_value(*v) } else { panic!("invalid value or unsupported null"); } @@ -626,76 +626,98 @@ impl Column { } /// returns true if the column cannot contain - pub fn max_less_than(&self, value: Option<&Scalar<'_>>) -> bool { + pub fn max_less_than(&self, value: &Scalar<'_>) -> bool { match self { - Column::String(c) => match value { - Some(scalar) => { - if let Scalar::String(v) = scalar { - c.meta.range().1 < Some(&v.to_string()) + Column::String(c) => { + if let Scalar::String(v) = value { + if let Some(range) = c.meta.range() { + range.1 < v.to_string() } else { - panic!("invalid value"); + false } - } - None => c.meta.range().1 < None, - }, - Column::Float(c) => { - if let Some(Scalar::Float(v)) = value { - c.meta.range().1 < *v } else { - panic!("invalid value or unsupported null"); + panic!("invalid value"); + } + } + Column::Float(c) => { + if let Scalar::Float(v) = value { + if let Some(range) = c.meta.range() { + range.1 < *v + } else { + false + } + } else { + panic!("invalid value"); } } Column::Integer(c) => { - if let Some(Scalar::Integer(v)) = value { - c.meta.range().1 < *v + if let Scalar::Integer(v) = value { + if let Some(range) = c.meta.range() { + range.1 < *v + } else { + false + } } else { - panic!("invalid value or unsupported null"); + panic!("invalid value"); } } } } - pub fn min_greater_than(&self, value: Option<&Scalar<'_>>) -> bool { + // TODO(edd): consolodate with max_less_than... Should just be single cmp function + pub fn min_greater_than(&self, value: &Scalar<'_>) -> bool { match self { - Column::String(c) => match value { - Some(scalar) => { - if let Scalar::String(v) = scalar { - c.meta.range().0 > Some(&v.to_string()) + Column::String(c) => { + if let Scalar::String(v) = value { + if let Some(range) = c.meta.range() { + range.0 > v.to_string() } else { - panic!("invalid value"); + false } - } - None => c.meta.range().0 > None, - }, - Column::Float(c) => { - if let Some(Scalar::Float(v)) = value { - c.meta.range().0 > *v } else { - panic!("invalid value or unsupported null"); + panic!("invalid value"); + } + } + Column::Float(c) => { + if let Scalar::Float(v) = value { + if let Some(range) = c.meta.range() { + range.0 > *v + } else { + false + } + } else { + panic!("invalid value"); } } Column::Integer(c) => { - if let Some(Scalar::Integer(v)) = value { - c.meta.range().0 > *v + if let Scalar::Integer(v) = value { + if let Some(range) = c.meta.range() { + range.0 > *v + } else { + false + } } else { - panic!("invalid value or unsupported null"); + panic!("invalid value"); } } } } /// Returns the minimum value contained within this column. - // FIXME(edd): Support NULL integers and floats pub fn min(&self) -> Option> { match self { - Column::String(c) => { - if let Some(min) = c.meta.range().0 { - return Some(Scalar::String(min)); - } - None - } - Column::Float(c) => Some(Scalar::Float(c.meta.range().0)), - Column::Integer(c) => Some(Scalar::Integer(c.meta.range().0)), + Column::String(c) => match c.meta.range() { + Some(range) => Some(Scalar::String(&range.0)), + None => None, + }, + Column::Float(c) => match c.meta.range() { + Some(range) => Some(Scalar::Float(range.0)), + None => None, + }, + Column::Integer(c) => match c.meta.range() { + Some(range) => Some(Scalar::Integer(range.0)), + None => None, + }, } } @@ -703,21 +725,28 @@ impl Column { // FIXME(edd): Support NULL integers and floats pub fn max(&self) -> Option> { match self { - Column::String(c) => { - if let Some(max) = c.meta.range().1 { - return Some(Scalar::String(max)); - } - None - } - Column::Float(c) => Some(Scalar::Float(c.meta.range().1)), - Column::Integer(c) => Some(Scalar::Integer(c.meta.range().1)), + Column::String(c) => match c.meta.range() { + Some(range) => Some(Scalar::String(&range.1)), + None => None, + }, + Column::Float(c) => match c.meta.range() { + Some(range) => Some(Scalar::Float(range.1)), + None => None, + }, + Column::Integer(c) => match c.meta.range() { + Some(range) => Some(Scalar::Integer(range.1)), + None => None, + }, } } pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option> { match self { Column::String(_) => unimplemented!("not implemented"), - Column::Float(c) => Some(Scalar::Float(c.sum_by_ids(row_ids))), + Column::Float(c) => match c.sum_by_ids(row_ids) { + Some(sum) => Some(Scalar::Float(sum)), + None => None, + }, Column::Integer(_) => unimplemented!("not implemented"), } } @@ -734,9 +763,10 @@ impl Column { AggregateType::Count => { Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64) } - AggregateType::Sum => { - Aggregate::Sum(Scalar::Float(c.sum_by_id_range(from_row_id, to_row_id))) - } + AggregateType::Sum => match c.sum_by_id_range(from_row_id, to_row_id) { + Some(sum) => Aggregate::Sum(Some(Scalar::Float(sum))), + None => Aggregate::Sum(None), + }, }, Column::Integer(_) => unimplemented!("not implemented"), @@ -753,20 +783,25 @@ impl Column { // TODO(edd) shouldn't let roaring stuff leak out... pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option { + let value = match value { + Some(v) => v, + None => return None, + }; + if !self.maybe_contains(value) { return None; } self.row_ids(value, std::cmp::Ordering::Equal) } - pub fn row_ids_gt(&self, value: Option<&Scalar<'_>>) -> Option { + pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option { if self.max_less_than(value) { return None; } self.row_ids(value, std::cmp::Ordering::Greater) } - pub fn row_ids_lt(&self, value: Option<&Scalar<'_>>) -> Option { + pub fn row_ids_lt(&self, value: &Scalar<'_>) -> Option { if self.min_greater_than(value) { return None; } @@ -785,9 +820,14 @@ impl Column { unimplemented!("not implemented yet"); } Column::Float(c) => { - let (col_min, col_max) = c.meta.range(); + let (col_min, col_max) = match c.meta.range() { + Some(range) => range, + // no min/max on column which means must be all NULL values. + None => return None, + }; + if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) { - if *low <= col_min && *high > col_max { + if low <= col_min && high > col_max { // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than // return a full one. Need to differentiate between "no values" @@ -808,9 +848,14 @@ impl Column { } } Column::Integer(c) => { - let (col_min, col_max) = c.meta.range(); + let (col_min, col_max) = match c.meta.range() { + Some(range) => range, + // no min/max on column which means must be all NULL values. + None => return None, + }; + if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) { - if *low <= col_min && *high > col_max { + if low <= col_min && high > col_max { // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than // return a full one. Need to differentiate between "no values" @@ -834,36 +879,28 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - fn row_ids( - &self, - value: Option<&Scalar<'_>>, - order: std::cmp::Ordering, - ) -> Option { + fn row_ids(&self, value: &Scalar<'_>, order: std::cmp::Ordering) -> Option { match self { Column::String(c) => { if order != std::cmp::Ordering::Equal { unimplemented!("> < not supported on strings yet"); } - match value { - Some(scalar) => { - if let Scalar::String(v) = scalar { - Some(c.data.row_ids_eq_roaring(Some(v.to_string()))) - } else { - panic!("invalid value"); - } - } - None => Some(c.data.row_ids_eq_roaring(None)), + + if let Scalar::String(v) = value { + Some(c.data.row_ids_eq_roaring(Some(v.to_string()))) + } else { + panic!("invalid value"); } } Column::Float(c) => { - if let Some(Scalar::Float(v)) = value { + if let Scalar::Float(v) = value { Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); } } Column::Integer(c) => { - if let Some(Scalar::Integer(v)) = value { + if let Scalar::Integer(v) = value { Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); @@ -901,21 +938,21 @@ impl AggregatableByRange for &Column { } } -impl From<&[f64]> for Column { - fn from(values: &[f64]) -> Self { - Self::Float(Float::from(values)) - } -} +// impl From<&[f64]> for Column { +// fn from(values: &[f64]) -> Self { +// Self::Float(Float::from(values)) +// } +// } -impl From<&[i64]> for Column { - fn from(values: &[i64]) -> Self { - Self::Integer(Integer::from(values)) - } -} +// impl From<&[i64]> for Column { +// fn from(values: &[i64]) -> Self { +// Self::Integer(Integer::from(values)) +// } +// } #[derive(Debug, Default)] pub struct String { - meta: metadata::Str, + meta: metadata::Metadata, // TODO(edd): this would probably have multiple possible encodings data: encoding::DictionaryRLE, @@ -940,7 +977,7 @@ impl String { self.data.push_additional(s, additional); } - pub fn column_range(&self) -> (Option<&std::string::String>, Option<&std::string::String>) { + pub fn column_range(&self) -> &Option<(std::string::String, std::string::String)> { self.meta.range() } @@ -987,81 +1024,81 @@ impl std::fmt::Display for String { } } -#[derive(Debug)] -pub struct Float { - meta: metadata::F64, +// #[derive(Debug)] +// pub struct Float { +// meta: metadata::F64, - // TODO(edd): compression of float columns - // data: encoding::PlainFixed, - data: Box>, -} +// // TODO(edd): compression of float columns +// // data: encoding::PlainFixed, +// data: Box>, +// } -impl Float { - pub fn column_range(&self) -> (f64, f64) { - self.meta.range() - } +// impl Float { +// pub fn column_range(&self) -> (f64, f64) { +// self.meta.range() +// } - pub fn size(&self) -> usize { - self.meta.size() + self.data.size() - } +// pub fn size(&self) -> usize { +// self.meta.size() + self.data.size() +// } - pub fn value(&self, row_id: usize) -> f64 { - self.data.value(row_id) - } +// pub fn value(&self, row_id: usize) -> f64 { +// self.data.value(row_id) +// } - pub fn values(&self, row_ids: &[usize]) -> Vec { - self.data.values(row_ids) - } +// pub fn values(&self, row_ids: &[usize]) -> Vec { +// self.data.values(row_ids) +// } - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { - self.data.encoded_values(row_ids) - } +// pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { +// self.data.encoded_values(row_ids) +// } - pub fn all_encoded_values(&self) -> Vec { - self.data.all_encoded_values() - } +// pub fn all_encoded_values(&self) -> Vec { +// self.data.all_encoded_values() +// } - pub fn scan_from(&self, row_id: usize) -> &[f64] { - self.data.scan_from(row_id) - } +// pub fn scan_from(&self, row_id: usize) -> &[f64] { +// self.data.scan_from(row_id) +// } - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 { - self.data.sum_by_ids(row_ids) - } +// pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> f64 { +// self.data.sum_by_ids(row_ids) +// } - pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 { - self.data.sum_by_id_range(from_row_id, to_row_id) - } +// pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> f64 { +// self.data.sum_by_id_range(from_row_id, to_row_id) +// } - pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - self.data.count_by_id_range(from_row_id, to_row_id) - } -} +// pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { +// self.data.count_by_id_range(from_row_id, to_row_id) +// } +// } -impl std::fmt::Display for Float { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Meta: {}, Data: {}", self.meta, self.data) - } -} +// impl std::fmt::Display for Float { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// write!(f, "Meta: {}, Data: {}", self.meta, self.data) +// } +// } -impl From<&[f64]> for Float { - fn from(values: &[f64]) -> Self { - let len = values.len(); - let mut min = std::f64::MAX; - let mut max = std::f64::MIN; +// impl From<&[f64]> for Float { +// fn from(values: &[f64]) -> Self { +// let len = values.len(); +// let mut min = std::f64::MAX; +// let mut max = std::f64::MIN; - // calculate min/max for meta data - for v in values { - min = min.min(*v); - max = max.max(*v); - } +// // calculate min/max for meta data +// for v in values { +// min = min.min(*v); +// max = max.max(*v); +// } - Self { - meta: metadata::F64::new((min, max), len), - data: Box::new(encoding::PlainFixed::from(values)), - } - } -} +// Self { +// meta: metadata::F64::new((min, max), len), +// data: Box::new(encoding::PlainFixed::from(values)), +// } +// } +// } // use arrow::array::Array; // impl From> for Float { @@ -1089,16 +1126,93 @@ impl From<&[f64]> for Float { // } // } +// #[derive(Debug)] +// pub struct Integer { +// meta: metadata::Metadata, + +// // TODO(edd): compression of integers +// data: Box>, +// } + +// impl Integer { +// pub fn column_range(&self) -> (Option<&i64>, Option<&i64>) { +// self.meta.range() +// } + +// pub fn size(&self) -> usize { +// self.meta.size() + self.data.size() +// } + +// pub fn value(&self, row_id: usize) -> i64 { +// self.data.value(row_id) +// } + +// pub fn values(&self, row_ids: &[usize]) -> Vec { +// self.data.values(row_ids) +// } + +// pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { +// self.data.encoded_values(row_ids) +// } + +// pub fn all_encoded_values(&self) -> Vec { +// self.data.all_encoded_values() +// } + +// pub fn scan_from(&self, row_id: usize) -> &[i64] { +// self.data.scan_from(row_id) +// } + +// /// Find the first logical row that contains this value. +// pub fn row_id_eq_value(&self, v: i64) -> Option { +// if !self.meta.maybe_contains_value(v) { +// return None; +// } +// self.data.row_id_eq_value(v) +// } +// } + +// impl std::fmt::Display for Integer { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// write!(f, "Meta: {}, Data: {}", self.meta, self.data) +// } +// } + +// impl From<&[i64]> for Integer { +// fn from(values: &[i64]) -> Self { +// let len = values.len(); +// let mut min = std::i64::MAX; +// let mut max = std::i64::MIN; + +// // calculate min/max for meta data +// for v in values { +// min = min.min(*v); +// max = max.max(*v); +// } + +// Self { +// meta: metadata::Metadata::new((Some(min), Some(max)), len), +// data: Box::new(encoding::PlainFixed::from(values)), +// } +// } +// } + #[derive(Debug)] -pub struct Integer { - meta: metadata::I64, +pub struct NumericColumn +where + T: Clone + std::cmp::PartialOrd + std::fmt::Debug, +{ + meta: metadata::Metadata, // TODO(edd): compression of integers - data: Box>, + data: Box>, } -impl Integer { - pub fn column_range(&self) -> (i64, i64) { +impl NumericColumn +where + T: Clone + std::cmp::PartialOrd + std::fmt::Debug, +{ + pub fn column_range(&self) -> &Option<(T, T)> { self.meta.range() } @@ -1106,57 +1220,53 @@ impl Integer { self.meta.size() + self.data.size() } - pub fn value(&self, row_id: usize) -> i64 { + pub fn value(&self, row_id: usize) -> Option { self.data.value(row_id) } - pub fn values(&self, row_ids: &[usize]) -> Vec { + pub fn values(&self, row_ids: &[usize]) -> Vec> { self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec> { self.data.encoded_values(row_ids) } - pub fn all_encoded_values(&self) -> Vec { + pub fn all_encoded_values(&self) -> Vec> { self.data.all_encoded_values() } - pub fn scan_from(&self, row_id: usize) -> &[i64] { + pub fn scan_from(&self, row_id: usize) -> &[Option] { self.data.scan_from(row_id) } /// Find the first logical row that contains this value. - pub fn row_id_eq_value(&self, v: i64) -> Option { + pub fn row_id_eq_value(&self, v: T) -> Option { if !self.meta.maybe_contains_value(v) { return None; } self.data.row_id_eq_value(v) } -} -impl std::fmt::Display for Integer { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Meta: {}, Data: {}", self.meta, self.data) + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { + self.data.sum_by_ids(row_ids) + } + + pub fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { + self.data.sum_by_id_range(from_row_id, to_row_id) + } + + pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + self.data.count_by_id_range(from_row_id, to_row_id) } } -impl From<&[i64]> for Integer { - fn from(values: &[i64]) -> Self { - let len = values.len(); - let mut min = std::i64::MAX; - let mut max = std::i64::MIN; - - // calculate min/max for meta data - for v in values { - min = min.min(*v); - max = max.max(*v); - } - - Self { - meta: metadata::I64::new((min, max), len), - data: Box::new(encoding::PlainFixed::from(values)), - } +impl std::fmt::Display for NumericColumn +where + T: Clone + std::cmp::PartialOrd + std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Meta: {}, Data: {}", self.meta, self.data) } } @@ -1164,33 +1274,55 @@ pub mod metadata { use std::mem::size_of; #[derive(Debug, Default)] - pub struct Str { - range: (Option, Option), + pub struct Metadata + where + T: Clone + std::fmt::Debug, + { + range: Option<(T, T)>, num_rows: usize, } - impl Str { - pub fn add(&mut self, s: Option) { - self.num_rows += 1; - - if s < self.range.0 { - self.range.0 = s.clone(); - } - - if s > self.range.1 { - self.range.1 = s; + impl Metadata + where + T: Clone + std::cmp::PartialOrd + std::fmt::Debug, + { + pub fn new(range: Option<(T, T)>, rows: usize) -> Self { + Self { + range, + num_rows: rows, } } - pub fn add_repeated(&mut self, s: Option, additional: usize) { + fn update_range(&mut self, v: T) { + match self.range { + Some(range) => { + if v < range.0 { + range.0 = v; + } + + if v > range.1 { + range.1 = v; + } + } + None => { + self.range = Some((v, v)); + } + } + } + + pub fn add(&mut self, v: Option) { + self.num_rows += 1; + + if let Some(v) = v { + self.update_range(v); + } + } + + pub fn add_repeated(&mut self, v: Option, additional: usize) { self.num_rows += additional; - if s < self.range.0 { - self.range.0 = s.clone(); - } - - if s > self.range.1 { - self.range.1 = s; + if let Some(v) = v { + self.update_range(v); } } @@ -1198,114 +1330,183 @@ pub mod metadata { self.num_rows } - pub fn maybe_contains_value(&self, v: Option) -> bool { - self.range.0 <= v && v <= self.range.1 + pub fn maybe_contains_value(&self, v: T) -> bool { + match self.range { + Some(range) => range.0 <= v && v <= range.1, + None => false, + } } - pub fn range(&self) -> (Option<&String>, Option<&String>) { - (self.range.0.as_ref(), self.range.1.as_ref()) + pub fn range(&self) -> &Option<(T, T)> { + &self.range } pub fn size(&self) -> usize { // size of types for num_rows and range let base_size = size_of::() + (2 * size_of::>()); - match &self.range { - (None, None) => base_size, - (Some(min), None) => base_size + min.len(), - (None, Some(max)) => base_size + max.len(), - (Some(min), Some(max)) => base_size + min.len() + max.len(), - } + + // + // TODO: figure out a way to specify that T must be able to describe its runtime size. + // + // match &self.range { + // (None, None) => base_size, + // (Some(min), None) => base_size + min.len(), + // (None, Some(max)) => base_size + max.len(), + // (Some(min), Some(max)) => base_size + min.len() + max.len(), + // } + base_size } } - impl std::fmt::Display for Str { + impl std::fmt::Display for Metadata { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Range: ({:?})", self.range) } } - #[derive(Debug, Default)] - pub struct F64 { - range: (f64, f64), - num_rows: usize, - } + // #[derive(Debug, Default)] + // pub struct Str { + // range: (Option, Option), + // num_rows: usize, + // } - impl F64 { - pub fn new(range: (f64, f64), rows: usize) -> Self { - Self { - range, - num_rows: rows, - } - } + // impl Str { + // pub fn add(&mut self, s: Option) { + // self.num_rows += 1; - pub fn maybe_contains_value(&self, v: f64) -> bool { - let res = self.range.0 <= v && v <= self.range.1; - log::debug!( - "column with ({:?}) maybe contain {:?} -- {:?}", - self.range, - v, - res - ); - res - } + // if s < self.range.0 { + // self.range.0 = s.clone(); + // } - pub fn num_rows(&self) -> usize { - self.num_rows - } + // if s > self.range.1 { + // self.range.1 = s; + // } + // } - pub fn range(&self) -> (f64, f64) { - self.range - } + // pub fn add_repeated(&mut self, s: Option, additional: usize) { + // self.num_rows += additional; - pub fn size(&self) -> usize { - size_of::() + (size_of::<(f64, f64)>()) - } - } + // if s < self.range.0 { + // self.range.0 = s.clone(); + // } - impl std::fmt::Display for F64 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Range: ({:?})", self.range) - } - } + // if s > self.range.1 { + // self.range.1 = s; + // } + // } - #[derive(Debug, Default)] - pub struct I64 { - range: (i64, i64), - num_rows: usize, - } + // pub fn num_rows(&self) -> usize { + // self.num_rows + // } - impl I64 { - pub fn new(range: (i64, i64), rows: usize) -> Self { - Self { - range, - num_rows: rows, - } - } + // pub fn maybe_contains_value(&self, v: Option) -> bool { + // self.range.0 <= v && v <= self.range.1 + // } - pub fn maybe_contains_value(&self, v: i64) -> bool { - self.range.0 <= v && v <= self.range.1 - } + // pub fn range(&self) -> (Option<&String>, Option<&String>) { + // (self.range.0.as_ref(), self.range.1.as_ref()) + // } - pub fn max(&self) -> i64 { - self.range.1 - } + // pub fn size(&self) -> usize { + // // size of types for num_rows and range + // let base_size = size_of::() + (2 * size_of::>()); + // match &self.range { + // (None, None) => base_size, + // (Some(min), None) => base_size + min.len(), + // (None, Some(max)) => base_size + max.len(), + // (Some(min), Some(max)) => base_size + min.len() + max.len(), + // } + // } + // } - pub fn num_rows(&self) -> usize { - self.num_rows - } + // impl std::fmt::Display for Str { + // fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // write!(f, "Range: ({:?})", self.range) + // } + // } - pub fn range(&self) -> (i64, i64) { - self.range - } + // #[derive(Debug, Default)] + // pub struct F64 { + // range: (f64, f64), + // num_rows: usize, + // } - pub fn size(&self) -> usize { - size_of::() + (size_of::<(i64, i64)>()) - } - } + // impl F64 { + // pub fn new(range: (f64, f64), rows: usize) -> Self { + // Self { + // range, + // num_rows: rows, + // } + // } - impl std::fmt::Display for I64 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Range: ({:?})", self.range) - } - } + // pub fn maybe_contains_value(&self, v: f64) -> bool { + // let res = self.range.0 <= v && v <= self.range.1; + // log::debug!( + // "column with ({:?}) maybe contain {:?} -- {:?}", + // self.range, + // v, + // res + // ); + // res + // } + + // pub fn num_rows(&self) -> usize { + // self.num_rows + // } + + // pub fn range(&self) -> (f64, f64) { + // self.range + // } + + // pub fn size(&self) -> usize { + // size_of::() + (size_of::<(f64, f64)>()) + // } + // } + + // impl std::fmt::Display for F64 { + // fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // write!(f, "Range: ({:?})", self.range) + // } + // } + + // #[derive(Debug, Default)] + // pub struct I64 { + // range: (i64, i64), + // num_rows: usize, + // } + + // impl I64 { + // pub fn new(range: (i64, i64), rows: usize) -> Self { + // Self { + // range, + // num_rows: rows, + // } + // } + + // pub fn maybe_contains_value(&self, v: i64) -> bool { + // self.range.0 <= v && v <= self.range.1 + // } + + // pub fn max(&self) -> i64 { + // self.range.1 + // } + + // pub fn num_rows(&self) -> usize { + // self.num_rows + // } + + // pub fn range(&self) -> (i64, i64) { + // self.range + // } + + // pub fn size(&self) -> usize { + // size_of::() + (size_of::<(i64, i64)>()) + // } + // } + + // impl std::fmt::Display for I64 { + // fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // write!(f, "Range: ({:?})", self.range) + // } + // } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 54b90b0a72..8e10f8a17a 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -9,14 +9,14 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug { type Item; fn size(&self) -> usize; - fn value(&self, row_id: usize) -> Self::Item; - fn values(&self, row_ids: &[usize]) -> Vec; - fn encoded_values(&self, row_ids: &[usize]) -> Vec; - fn all_encoded_values(&self) -> Vec; - fn scan_from(&self, row_id: usize) -> &[Self::Item]; + fn value(&self, row_id: usize) -> Option; + fn values(&self, row_ids: &[usize]) -> Vec>; + fn encoded_values(&self, row_ids: &[usize]) -> Vec>; + fn all_encoded_values(&self) -> Vec>; + fn scan_from(&self, row_id: usize) -> &[Option]; - fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Self::Item; - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Self::Item; + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option; + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option; fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64; @@ -56,7 +56,7 @@ where + std::fmt::Debug + std::ops::Add, { - type Item = Option; + type Item = T::Native; fn size(&self) -> usize { self.arr.len() @@ -122,31 +122,41 @@ where // where you accept an array. let mut res = T::Native::default(); let vec = row_ids.to_vec(); + let mut non_null = false; for row_id in vec { let i = row_id as usize; if self.arr.is_null(i) { - return None; + continue; // skip NULL values } + non_null = true; res = res + self.arr.value(i); } - Some(res) + + // TODO: ghetto. + if non_null { + Some(res) + } else { + None + } } fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { - // if the column contains a null value between the range then the result - // will be None. + let mut res = T::Native::default(); + let mut non_null = false; + for i in from_row_id..to_row_id { if self.arr.is_null(i) { - return None; + continue; } - } - - // Otherwise sum all the values between in the range. - let mut res = T::Native::default(); - for i in from_row_id..to_row_id { + non_null = true; res = res + self.arr.value(i); } - Some(res) + + if non_null { + Some(res) + } else { + None + } } fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { @@ -158,7 +168,7 @@ where } count += 1; } - count + count // if there are no non-null rows the result is 0 rather than NULL } fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { @@ -256,23 +266,23 @@ where } // get value at row_id. Panics if out of bounds. - fn value(&self, row_id: usize) -> T { - self.values[row_id] + fn value(&self, row_id: usize) -> Option { + Some(self.values[row_id]) } /// Return the decoded values for the provided logical row ids. - fn values(&self, row_ids: &[usize]) -> Vec { + fn values(&self, row_ids: &[usize]) -> Vec> { let mut out = Vec::with_capacity(row_ids.len()); for chunks in row_ids.chunks_exact(4) { - out.push(self.values[chunks[3]]); - out.push(self.values[chunks[2]]); - out.push(self.values[chunks[1]]); - out.push(self.values[chunks[0]]); + out.push(Some(self.values[chunks[3]])); + out.push(Some(self.values[chunks[2]])); + out.push(Some(self.values[chunks[1]])); + out.push(Some(self.values[chunks[0]])); } let rem = row_ids.len() % 4; for &i in &row_ids[row_ids.len() - rem..row_ids.len()] { - out.push(self.values[i]); + out.push(Some(self.values[i])); } assert_eq!(out.len(), row_ids.len()); @@ -281,18 +291,19 @@ where /// Return the raw encoded values for the provided logical row ids. For Plain /// encoding this is just the decoded values. - fn encoded_values(&self, row_ids: &[usize]) -> Vec { + fn encoded_values(&self, row_ids: &[usize]) -> Vec> { self.values(row_ids) } /// Return all encoded values. For this encoding this is just the decoded /// values - fn all_encoded_values(&self) -> Vec { - self.values.clone() + fn all_encoded_values(&self) -> Vec> { + self.values.iter().map(|x| Some(*x)).collect::>() } - fn scan_from(&self, row_id: usize) -> &[T] { - &self.values[row_id..] + fn scan_from(&self, row_id: usize) -> &[Option] { + unimplemented!("this should probably take a destination vector or maybe a closure"); + // &self.values[row_id..] } /// returns a set of row ids that match a single ordering on a desired value @@ -371,12 +382,12 @@ where bm } - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> T { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { let mut res = T::default(); for v in self.values[from_row_id..to_row_id].iter() { res += *v; } - res + Some(res) } fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { @@ -384,64 +395,18 @@ where } // TODO(edd): make faster - fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> T { + fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { let mut res = T::default(); - // println!( - // "cardinality is {:?} out of {:?}", - // row_ids.cardinality(), - // self.values.len() - // ); - // HMMMMM - materialising which has a memory cost. - // let vec = row_ids.to_vec(); - // for v in vec.chunks_exact(4) { - // res += self.value(v[0] as usize); - // res += self.value(v[1] as usize); - // res += self.value(v[2] as usize); - // res += self.value(v[3] as usize); - // } - - // HMMMMM - materialising which has a memory cost. + // Consider accepting a vec of ids if those ids need to be used again + // across other columns. let vec = row_ids.to_vec(); for v in vec { - res += self.value(v as usize); + // Todo(edd): this could benefit from unrolling (maybe) + res += self.values[v as usize]; } - // for v in row_ids.iter() { - // res += self.value(v as usize); - // } - - // let step = 16_u64; - // for i in (0..self.values.len() as u64).step_by(step as usize) { - // if row_ids.contains_range(i..i + step) { - // res += self.value(i as usize + 15); - // res += self.value(i as usize + 14); - // res += self.value(i as usize + 13); - // res += self.value(i as usize + 12); - // res += self.value(i as usize + 11); - // res += self.value(i as usize + 10); - // res += self.value(i as usize + 9); - // res += self.value(i as usize + 8); - // res += self.value(i as usize + 7); - // res += self.value(i as usize + 6); - // res += self.value(i as usize + 5); - // res += self.value(i as usize + 4); - // res += self.value(i as usize + 3); - // res += self.value(i as usize + 2); - // res += self.value(i as usize + 1); - // res += self.value(i as usize); - // continue; - // } - - // for j in i..i + step { - // if row_ids.contains(j as u32) { - // res += self.value(j as usize); - // } - // } - // } - - // row_ids.iter().for_each(|x| res += self.value(x as usize)); - res + Some(res) } fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { @@ -453,7 +418,6 @@ impl From<&[i64]> for PlainFixed { fn from(v: &[i64]) -> Self { Self { values: v.to_vec(), - // buf: Vec::with_capacity(v.len()), total_order: false, size: size_of::>() + (size_of::() * v.len()) @@ -467,7 +431,6 @@ impl From<&[f64]> for PlainFixed { fn from(v: &[f64]) -> Self { Self { values: v.to_vec(), - // buf: Vec::with_capacity(v.len()), total_order: false, size: size_of::>() + (size_of::() * v.len()) From 1968b654ccb0b9412248fb7c07807f51eb5bee7d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 14 Sep 2020 12:32:49 +0100 Subject: [PATCH 56/73] refactor: fix vector support --- delorean_mem_qe/src/column.rs | 168 +++++++++++++++++++++++++--------- 1 file changed, 123 insertions(+), 45 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 4faffdc4da..ccc2f053a2 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -2,6 +2,13 @@ use std::convert::From; use super::encoding; +#[derive(Debug, PartialEq, PartialOrd, Clone)] +pub enum Value<'a> { + Null, + String(&'a str), + Scalar(Scalar<'a>), +} + #[derive(Debug, PartialEq, PartialOrd, Clone)] pub enum Scalar<'a> { String(&'a str), @@ -162,49 +169,94 @@ pub enum Vector<'a> { EncodedString(Vec), Float(Vec>), Integer(Vec>), + // TODO(edd): add types like this: + // + // Integer16(Vec), + // NullInteger16(Vec>), // contains one or more NULL values + // ... + // ... + // + // We won't need EncodedString then (it can use one of the non-null integer variants) + // } impl<'a> Vector<'a> { - // pub fn aggregate_by_id_range( - // &self, - // agg_type: &AggregateType, - // from_row_id: usize, - // to_row_id: usize, - // ) -> Aggregate<'a> { - // match agg_type { - // AggregateType::Count => { - // Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) - // } - // AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), - // } - // } + pub fn aggregate_by_id_range( + &self, + agg_type: &AggregateType, + from_row_id: usize, + to_row_id: usize, + ) -> Aggregate<'a> { + match agg_type { + AggregateType::Count => { + Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) + } + AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), + } + } - // fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Scalar<'a> { - // match self { - // Vector::String(_) => { - // panic!("can't sum strings...."); - // } - // Vector::Float(values) => { - // let mut res = 0.0; - // // TODO(edd): check asm to see if it's vectorising - // for v in values[from_row_id..to_row_id].iter() { - // res += *v; - // } - // Scalar::Float(res) - // } - // Vector::Integer(values) => { - // let mut res = 0; - // // TODO(edd): check asm to see if it's vectorising - // for v in values[from_row_id..to_row_id].iter() { - // res += *v; - // } - // Scalar::Integer(res) - // } - // } - // } + // Return the sum of values in the vector. NULL values are ignored. If there + // are no non-null values in the vector being summed then None is returned. + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option> { + match self { + Vector::String(_) => { + panic!("can't sum strings...."); + } + Vector::Float(values) => { + let mut res = 0.0; + let mut found = false; + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + if let Some(v) = v { + res += *v; + found = true; + } + } + + if found { + return Some(Scalar::Float(res)); + } + None + } + Vector::Integer(values) => { + let mut res = 0; + let mut found = false; + + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + if let Some(v) = v { + res += *v; + found = true; + } + } + + if found { + return Some(Scalar::Integer(res)); + } + None + } + Vector::EncodedString(values) => { + let mut res = 0; + + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + res += *v; + } + Some(Scalar::Integer(res)) + } + } + } + + // return the count of values on the column. NULL values do not contribute + // to the count. fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - to_row_id - from_row_id + match self { + Vector::String(vec) => vec.iter().filter(|x| x.is_some()).count(), + Vector::EncodedString(_) => to_row_id - from_row_id, // fast - no possible NULL values + Vector::Float(vec) => vec.iter().filter(|x| x.is_some()).count(), + Vector::Integer(vec) => vec.iter().filter(|x| x.is_some()).count(), + } } pub fn extend(&mut self, other: Self) { @@ -230,6 +282,13 @@ impl<'a> Vector<'a> { unreachable!("string can't be extended"); } } + Vector::EncodedString(v) => { + if let Self::EncodedString(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } } } @@ -242,15 +301,27 @@ impl<'a> Vector<'a> { Self::String(v) => v.len(), Self::Float(v) => v.len(), Self::Integer(v) => v.len(), + Vector::EncodedString(v) => v.len(), } } - pub fn get(&self, i: usize) -> Scalar<'a> { + /// Return the value within the vector at position `i`. If the value at + /// position `i` is NULL then `None` is returned. + pub fn get(&self, i: usize) -> Value<'a> { match self { - // FIXME(edd): SORT THIS OPTION OUT - Self::String(v) => Scalar::String(v[i].as_ref().unwrap()), - Self::Float(v) => Scalar::Float(v[i]), - Self::Integer(v) => Scalar::Integer(v[i]), + Self::String(v) => match v[i] { + Some(v) => Value::String(v), + None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()), + }, + Self::Float(v) => match v[i] { + Some(v) => Value::Scalar(Scalar::Float(v)), + None => Value::Null, + }, + Self::Integer(v) => match v[i] { + Some(v) => Value::Scalar(Scalar::Integer(v)), + None => Value::Null, + }, + Self::EncodedString(v) => Value::Scalar(Scalar::Integer(v[i])), } } @@ -265,6 +336,7 @@ impl<'a> Vector<'a> { Self::Integer(v) => { v.swap(a, b); } + Vector::EncodedString(v) => v.swap(a, b), } } } @@ -293,7 +365,7 @@ impl<'a> VectorIterator<'a> { } } impl<'a> Iterator for VectorIterator<'a> { - type Item = Scalar<'a>; + type Item = Value<'a>; fn next(&mut self) -> Option { let curr_i = self.next_i; @@ -316,11 +388,17 @@ impl<'a> std::fmt::Display for Vector<'a> { Self::Float(v) => write!(f, "{:?}", v), Self::Integer(v) => { for x in v.iter() { - let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0); - write!(f, "{}, ", ts)?; + match x { + Some(x) => { + let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0); + write!(f, "{}, ", ts)?; + } + None => write!(f, "NULL, ")?, + } } Ok(()) } + Vector::EncodedString(v) => write!(f, "{:?}", v), } } } From a107da6dfe8be6e630b71338895ecb7a2eac10a0 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 15 Sep 2020 13:21:44 +0100 Subject: [PATCH 57/73] refactor: temp add not null materialised vectors --- delorean_mem_qe/src/encoding.rs | 67 +++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 8e10f8a17a..1945ae3224 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -11,14 +11,16 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug { fn size(&self) -> usize; fn value(&self, row_id: usize) -> Option; fn values(&self, row_ids: &[usize]) -> Vec>; - fn encoded_values(&self, row_ids: &[usize]) -> Vec>; - fn all_encoded_values(&self) -> Vec>; + + fn encoded_values(&self, row_ids: &[usize]) -> Vec; + fn all_encoded_values(&self) -> Vec; + fn scan_from(&self, row_id: usize) -> &[Option]; fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option; fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option; - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize; + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64; fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64; fn row_id_eq_value(&self, v: Self::Item) -> Option; @@ -82,20 +84,31 @@ where out } - /// Well this is terribly slow - fn encoded_values(&self, row_ids: &[usize]) -> Vec> { - self.values(row_ids) + /// encoded_values returns encoded values for the encoding. If the encoding + /// supports null values then the values returned are undefined. + /// + /// encoded_values should not be called on nullable columns. + fn encoded_values(&self, row_ids: &[usize]) -> Vec { + // assertion here during development to check this isn't called on + // encodings that can have null values. + assert_eq!(self.arr.null_count(), 0); + + let mut out = Vec::with_capacity(row_ids.len()); + for &row_id in row_ids { + out.push(self.arr.value(row_id)); + } + assert_eq!(out.len(), row_ids.len()); + out } - /// TODO(edd): there must be a more efficient way. - fn all_encoded_values(&self) -> Vec> { + fn all_encoded_values(&self) -> Vec { + // assertion here during development to check this isn't called on + // encodings that can have null values. + assert_eq!(self.arr.null_count(), 0); + let mut out = Vec::with_capacity(self.arr.len()); for i in 0..self.arr.len() { - if self.arr.is_null(i) { - out.push(None) - } else { - out.push(Some(self.arr.value(i))) - } + out.push(self.arr.value(i)); } assert_eq!(out.len(), self.arr.len()); out @@ -159,7 +172,7 @@ where } } - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { // TODO - count values that are not null in the row range. let mut count = 0; for i in from_row_id..to_row_id { @@ -291,14 +304,28 @@ where /// Return the raw encoded values for the provided logical row ids. For Plain /// encoding this is just the decoded values. - fn encoded_values(&self, row_ids: &[usize]) -> Vec> { - self.values(row_ids) + fn encoded_values(&self, row_ids: &[usize]) -> Vec { + let mut out = Vec::with_capacity(row_ids.len()); + for chunks in row_ids.chunks_exact(4) { + out.push(self.values[chunks[3]]); + out.push(self.values[chunks[2]]); + out.push(self.values[chunks[1]]); + out.push(self.values[chunks[0]]); + } + + let rem = row_ids.len() % 4; + for &i in &row_ids[row_ids.len() - rem..row_ids.len()] { + out.push(self.values[i]); + } + + assert_eq!(out.len(), row_ids.len()); + out } /// Return all encoded values. For this encoding this is just the decoded /// values - fn all_encoded_values(&self) -> Vec> { - self.values.iter().map(|x| Some(*x)).collect::>() + fn all_encoded_values(&self) -> Vec { + self.values.clone() // TODO(edd):perf probably can return reference to vec. } fn scan_from(&self, row_id: usize) -> &[Option] { @@ -390,8 +417,8 @@ where Some(res) } - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { - to_row_id - from_row_id + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { + (to_row_id - from_row_id) as u64 } // TODO(edd): make faster From ba39d731e0ac25cb05d410aa42136a8402e90a8b Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Wed, 16 Sep 2020 16:59:00 +0100 Subject: [PATCH 58/73] refactor: get build working --- delorean_mem_qe/src/column.rs | 415 +++++++++++++++++++++++++------- delorean_mem_qe/src/encoding.rs | 42 +--- delorean_mem_qe/src/segment.rs | 118 +++++---- delorean_mem_qe/src/sorter.rs | 13 +- 4 files changed, 416 insertions(+), 172 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index ccc2f053a2..3b27743c2b 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -14,6 +14,7 @@ pub enum Scalar<'a> { String(&'a str), Float(f64), Integer(i64), + Unsigned32(u32), } impl<'a> Scalar<'a> { @@ -28,6 +29,9 @@ impl<'a> Scalar<'a> { Scalar::Integer(v) => { *v = 0; } + Scalar::Unsigned32(v) => { + *v = 0; + } } } @@ -47,6 +51,13 @@ impl<'a> Scalar<'a> { panic!("invalid"); }; } + Self::Unsigned32(v) => { + if let Self::Unsigned32(other) = other { + *v += other; + } else { + panic!("invalid"); + }; + } Self::String(_) => { unreachable!("not possible to add strings"); } @@ -54,6 +65,39 @@ impl<'a> Scalar<'a> { } } +impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> { + type Output = Scalar<'a>; + + fn add(self, _rhs: &Scalar<'a>) -> Self::Output { + match *self { + Scalar::Float(v) => { + if let Scalar::Float(other) = _rhs { + Scalar::Float(v + other) + } else { + panic!("invalid"); + } + } + Scalar::Integer(v) => { + if let Scalar::Integer(other) = _rhs { + Scalar::Integer(v + other) + } else { + panic!("invalid"); + } + } + Scalar::Unsigned32(v) => { + if let Scalar::Unsigned32(other) = _rhs { + Scalar::Unsigned32(v + other) + } else { + panic!("invalid"); + } + } + Scalar::String(_) => { + unreachable!("not possible to add strings"); + } + } + } +} + impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { type Output = Scalar<'a>; @@ -73,6 +117,13 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { panic!("invalid"); } } + Self::Unsigned32(v) => { + if let Self::Unsigned32(other) = _rhs { + Self::Unsigned32(v + other) + } else { + panic!("invalid"); + } + } Self::String(_) => { unreachable!("not possible to add strings"); } @@ -80,6 +131,37 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { } } +impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> { + fn add_assign(&mut self, _rhs: &Scalar<'a>) { + match self { + Scalar::Float(v) => { + if let Scalar::Float(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } + Scalar::Integer(v) => { + if let Scalar::Integer(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } + Scalar::Unsigned32(v) => { + if let Scalar::Unsigned32(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } + Scalar::String(_) => { + unreachable!("not possible to add strings"); + } + } + } +} + impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { fn add_assign(&mut self, _rhs: &Scalar<'a>) { match self { @@ -97,6 +179,13 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { panic!("invalid"); }; } + Self::Unsigned32(v) => { + if let Self::Unsigned32(other) = _rhs { + *v += *other; + } else { + panic!("invalid"); + }; + } Self::String(_) => { unreachable!("not possible to add strings"); } @@ -107,6 +196,8 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { #[derive(Clone, Debug)] pub enum Aggregate<'a> { Count(u64), + // Sum can be `None` is for example all values being aggregated are themselves + // `None`. Sum(Option>), } @@ -116,44 +207,71 @@ pub enum AggregateType { Sum, } -// impl<'a> std::ops::Add> for Aggregate<'a> { +// impl<'a> std::ops::Add<&Option>> for Aggregate<'a> { // type Output = Aggregate<'a>; -// fn add(self, _rhs: Scalar<'a>) -> Self::Output { +// fn add(self, _rhs: &Option>) -> Self::Output { // match self { -// Self::Count(c) => Self::Count(c + 1), -// Self::Sum(s) => Self::Sum(s + &_rhs), +// Self::Count(self_count) => match _rhs { +// Some(other_scalar) => match other_scalar { +// Scalar::String(_) => panic!("todo - remove String scalar"), +// Scalar::Float(_) => panic!("cannot add floating point value to a count"), +// Scalar::Integer(v) => Self::Count(self_count + *v as u64), +// Scalar::Unsigned32(v) => Self::Count(self_count + *v as u64), +// }, +// None => self, +// }, +// // SUM ignores NULL values. Initially an aggregate sum is `None`, but +// // as soon as a non-null value is shown then it becomes `Some`. +// Self::Sum(self_sum) => match (self_sum, _rhs) { +// (None, None) => Self::Sum(None), +// (None, Some(other_scalar)) => match other_scalar { +// Scalar::String(_) => panic!("todo - remove String scalar"), +// Scalar::Float(_) => Self::Sum(Some(other_scalar.clone())), +// Scalar::Integer(_) => Self::Sum(Some(other_scalar.clone())), +// Scalar::Unsigned32(_) => Self::Sum(Some(other_scalar.clone())), +// }, +// (Some(_self), None) => Self::Sum(Some(_self.clone())), +// (Some(self_scalar), Some(other_scalar)) => match other_scalar { +// Scalar::String(_) => panic!("todo - remove String scalar"), +// Scalar::Float(_) => Self::Sum(Some(self_scalar + &other_scalar)), +// Scalar::Integer(_) => Self::Sum(Some(self_scalar + &other_scalar)), +// Scalar::Unsigned32(_) => Self::Sum(Some(self_scalar + &other_scalar)), +// }, +// }, // } // } // } -impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { - type Output = Aggregate<'a>; +// impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { +// type Output = Aggregate<'a>; - fn add(self, _rhs: &Aggregate<'a>) -> Self::Output { - match self { - Self::Count(c) => { - if let Self::Count(other) = _rhs { - Self::Count(c + other) - } else { - panic!("invalid"); - } - } - Self::Sum(s) => { - if let Self::Sum(other) = _rhs { - match (s, other) { - (None, None) => Self::Sum(None), - (None, Some(other)) => Self::Sum(Some(*other)), - (Some(s), None) => Self::Sum(Some(s)), - (Some(s), Some(other)) => Self::Sum(Some(s + other)), - } - } else { - panic!("invalid"); - } - } - } - } -} +// fn add(self, _rhs: &Aggregate<'a>) -> Self::Output { +// match self { +// Self::Count(self_count) => { +// if let Self::Count(other) = _rhs { +// Self::Count(self_count + *other) +// } else { +// panic!("can't combine count with other aggregate type"); +// } +// } +// // SUM ignores NULL values. Initially an aggregate sum is `None`, but +// // as soon as a non-null value is shown then it becomes `Some`. +// Self::Sum(self_sum) => { +// if let Self::Sum(other) = _rhs { +// match (self_sum, other) { +// (None, None) => Self::Sum(None), +// (None, Some(_)) => Self::Sum(*other), +// (Some(_), None) => self, +// (Some(s), Some(other)) => Self::Sum(Some(s + other)), +// } +// } else { +// panic!("invalid"); +// } +// } +// } +// } +// } pub trait AggregatableByRange { fn aggregate_by_id_range( @@ -163,12 +281,16 @@ pub trait AggregatableByRange { to_row_id: usize, ) -> Aggregate<'_>; } + /// A Vector is a materialised vector of values from a column. pub enum Vector<'a> { - String(Vec<&'a Option>), - EncodedString(Vec), - Float(Vec>), - Integer(Vec>), + NullString(Vec<&'a Option>), + NullFloat(Vec>), + NullInteger(Vec>), + + Float(Vec), + Integer(Vec), + Unsigned32(Vec), // TODO(edd): add types like this: // // Integer16(Vec), @@ -189,7 +311,7 @@ impl<'a> Vector<'a> { ) -> Aggregate<'a> { match agg_type { AggregateType::Count => { - Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id) as u64) + Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id)) } AggregateType::Sum => Aggregate::Sum(self.sum_by_id_range(from_row_id, to_row_id)), } @@ -199,12 +321,12 @@ impl<'a> Vector<'a> { // are no non-null values in the vector being summed then None is returned. fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option> { match self { - Vector::String(_) => { + Self::NullString(_) => { panic!("can't sum strings...."); } - Vector::Float(values) => { + Self::NullFloat(values) => { let mut res = 0.0; - let mut found = false; + let mut found = false; // TODO(edd): check if this is faster than a match. // TODO(edd): check asm to see if it's vectorising for v in values[from_row_id..to_row_id].iter() { @@ -219,7 +341,16 @@ impl<'a> Vector<'a> { } None } - Vector::Integer(values) => { + Self::Float(values) => { + let mut res = 0.0; + + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + res += *v; + } + Some(Scalar::Float(res)) + } + Self::NullInteger(values) => { let mut res = 0; let mut found = false; @@ -236,7 +367,7 @@ impl<'a> Vector<'a> { } None } - Vector::EncodedString(values) => { + Self::Integer(values) => { let mut res = 0; // TODO(edd): check asm to see if it's vectorising @@ -245,24 +376,58 @@ impl<'a> Vector<'a> { } Some(Scalar::Integer(res)) } + Self::Unsigned32(values) => { + let mut res = 0; + + // TODO(edd): check asm to see if it's vectorising + for v in values[from_row_id..to_row_id].iter() { + res += *v; + } + Some(Scalar::Unsigned32(res)) + } } } // return the count of values on the column. NULL values do not contribute // to the count. - fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { match self { - Vector::String(vec) => vec.iter().filter(|x| x.is_some()).count(), - Vector::EncodedString(_) => to_row_id - from_row_id, // fast - no possible NULL values - Vector::Float(vec) => vec.iter().filter(|x| x.is_some()).count(), - Vector::Integer(vec) => vec.iter().filter(|x| x.is_some()).count(), + Self::NullString(vec) => { + let count = vec.iter().filter(|x| x.is_some()).count(); + count as u64 + } + Self::NullFloat(vec) => { + let count = vec.iter().filter(|x| x.is_some()).count(); + count as u64 + } + Self::NullInteger(vec) => { + let count = vec.iter().filter(|x| x.is_some()).count(); + count as u64 + } + Self::Float(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values + Self::Integer(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values + Self::Unsigned32(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values } } pub fn extend(&mut self, other: Self) { match self { - Self::String(v) => { - if let Self::String(other) = other { + Self::NullString(v) => { + if let Self::NullString(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::NullFloat(v) => { + if let Self::NullFloat(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::NullInteger(v) => { + if let Self::NullInteger(other) = other { v.extend(other); } else { unreachable!("string can't be extended"); @@ -282,8 +447,8 @@ impl<'a> Vector<'a> { unreachable!("string can't be extended"); } } - Vector::EncodedString(v) => { - if let Self::EncodedString(other) = other { + Self::Unsigned32(v) => { + if let Self::Unsigned32(other) = other { v.extend(other); } else { unreachable!("string can't be extended"); @@ -298,10 +463,12 @@ impl<'a> Vector<'a> { pub fn len(&self) -> usize { match self { - Self::String(v) => v.len(), + Self::NullString(v) => v.len(), + Self::NullFloat(v) => v.len(), + Self::NullInteger(v) => v.len(), Self::Float(v) => v.len(), Self::Integer(v) => v.len(), - Vector::EncodedString(v) => v.len(), + Self::Unsigned32(v) => v.len(), } } @@ -309,25 +476,54 @@ impl<'a> Vector<'a> { /// position `i` is NULL then `None` is returned. pub fn get(&self, i: usize) -> Value<'a> { match self { - Self::String(v) => match v[i] { + Self::NullString(v) => match v[i] { Some(v) => Value::String(v), None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()), }, - Self::Float(v) => match v[i] { + Self::NullFloat(v) => match v[i] { Some(v) => Value::Scalar(Scalar::Float(v)), None => Value::Null, }, - Self::Integer(v) => match v[i] { + Self::NullInteger(v) => match v[i] { Some(v) => Value::Scalar(Scalar::Integer(v)), None => Value::Null, }, - Self::EncodedString(v) => Value::Scalar(Scalar::Integer(v[i])), + Self::Float(v) => Value::Scalar(Scalar::Float(v[i])), + Self::Integer(v) => Value::Scalar(Scalar::Integer(v[i])), + Self::Unsigned32(v) => Value::Scalar(Scalar::Unsigned32(v[i])), + } + } + + /// Return the value within the vector at position `i`. If the value at + /// position `i` is NULL then `None` is returned. + // + // TODO - sort out + pub fn get_scalar(&self, i: usize) -> Option> { + match self { + Self::NullString(_) => panic!("unsupported get_scalar"), + Self::NullFloat(v) => match v[i] { + Some(v) => Some(Scalar::Float(v)), + None => None, + }, + Self::NullInteger(v) => match v[i] { + Some(v) => Some(Scalar::Integer(v)), + None => None, + }, + Self::Float(v) => Some(Scalar::Float(v[i])), + Self::Integer(v) => Some(Scalar::Integer(v[i])), + Self::Unsigned32(v) => Some(Scalar::Unsigned32(v[i])), } } pub fn swap(&mut self, a: usize, b: usize) { match self { - Self::String(v) => { + Self::NullString(v) => { + v.swap(a, b); + } + Self::NullFloat(v) => { + v.swap(a, b); + } + Self::NullInteger(v) => { v.swap(a, b); } Self::Float(v) => { @@ -336,7 +532,9 @@ impl<'a> Vector<'a> { Self::Integer(v) => { v.swap(a, b); } - Vector::EncodedString(v) => v.swap(a, b), + Self::Unsigned32(v) => { + v.swap(a, b); + } } } } @@ -352,8 +550,6 @@ impl AggregatableByRange for &Vector<'_> { } } -/// VectorIterator allows a `Vector` to be iterated. Until vectors are drained -/// Scalar values are emitted. pub struct VectorIterator<'a> { v: &'a Vector<'a>, next_i: usize, @@ -379,14 +575,44 @@ impl<'a> Iterator for VectorIterator<'a> { } } +/// NullVectorIterator allows a `Vector` to be iterated. Until vectors are +/// drained Scalar values are emitted. +/// +/// +/// TODO - need to figure this out - currently only returns scalars +pub struct NullVectorIterator<'a> { + v: &'a Vector<'a>, + next_i: usize, +} + +impl<'a> NullVectorIterator<'a> { + pub fn new(v: &'a Vector<'a>) -> Self { + Self { v, next_i: 0 } + } +} +impl<'a> Iterator for NullVectorIterator<'a> { + type Item = Option>; + + fn next(&mut self) -> Option { + let curr_i = self.next_i; + self.next_i += 1; + + if curr_i == self.v.len() { + return None; + } + + Some(self.v.get_scalar(curr_i)) + } +} + use chrono::prelude::*; impl<'a> std::fmt::Display for Vector<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::String(v) => write!(f, "{:?}", v), - Self::Float(v) => write!(f, "{:?}", v), - Self::Integer(v) => { + Self::NullString(v) => write!(f, "{:?}", v), + Self::NullFloat(v) => write!(f, "{:?}", v), + Self::NullInteger(v) => { for x in v.iter() { match x { Some(x) => { @@ -398,7 +624,16 @@ impl<'a> std::fmt::Display for Vector<'a> { } Ok(()) } - Vector::EncodedString(v) => write!(f, "{:?}", v), + Self::Float(v) => write!(f, "{:?}", v), + Self::Integer(v) => { + // TODO(edd) remove as this is timestamp specific + for x in v.iter() { + let ts = NaiveDateTime::from_timestamp(*x / 1000 / 1000, 0); + write!(f, "{}, ", ts)?; + } + Ok(()) + } + Self::Unsigned32(v) => write!(f, "{:?}", v), } } } @@ -474,10 +709,10 @@ impl Column { match self { Column::String(c) => { if row_ids.is_empty() { - return Vector::String(vec![]); + return Vector::NullString(vec![]); } - Vector::String(c.values(row_ids)) + Vector::NullString(c.values(row_ids)) } Column::Float(c) => { if row_ids.is_empty() { @@ -488,7 +723,7 @@ impl Column { let v = c.values(row_ids); log::debug!("time getting decoded values for float {:?}", now.elapsed()); - Vector::Float(v) + Vector::NullFloat(v) } Column::Integer(c) => { if row_ids.is_empty() { @@ -498,7 +733,7 @@ impl Column { let now = std::time::Instant::now(); let v = c.values(row_ids); log::debug!("time getting decoded values for int {:?}", now.elapsed()); - Vector::Integer(v) + Vector::NullInteger(v) } } } @@ -509,7 +744,7 @@ impl Column { match self { Column::String(c) => { if row_ids.is_empty() { - return Vector::String(vec![]); + return Vector::NullString(vec![]); } let row_id_vec = row_ids @@ -517,7 +752,7 @@ impl Column { .iter() .map(|v| *v as usize) .collect::>(); - Vector::String(c.values(&row_id_vec)) + Vector::NullString(c.values(&row_id_vec)) } Column::Float(c) => { if row_ids.is_empty() { @@ -529,7 +764,7 @@ impl Column { .iter() .map(|v| *v as usize) .collect::>(); - Vector::Float(c.values(&row_id_vec)) + Vector::NullFloat(c.values(&row_id_vec)) } Column::Integer(c) => { if row_ids.is_empty() { @@ -541,7 +776,7 @@ impl Column { .iter() .map(|v| *v as usize) .collect::>(); - Vector::Integer(c.values(&row_id_vec)) + Vector::NullInteger(c.values(&row_id_vec)) } } } @@ -560,13 +795,13 @@ impl Column { match self { Column::String(c) => { if row_ids.is_empty() { - return Vector::Integer(vec![]); + return Vector::Unsigned32(vec![]); } let now = std::time::Instant::now(); let v = c.encoded_values(&row_ids_vec); log::debug!("time getting encoded values {:?}", now.elapsed()); - Vector::EncodedString(v) + Vector::Unsigned32(v) } Column::Float(c) => { if row_ids.is_empty() { @@ -591,7 +826,7 @@ impl Column { match self { Column::String(c) => { if row_ids.is_empty() { - return Vector::Integer(vec![]); + return Vector::Unsigned32(vec![]); } let now = std::time::Instant::now(); @@ -599,7 +834,7 @@ impl Column { log::debug!("time getting encoded values {:?}", now.elapsed()); log::debug!("dictionary {:?}", c.data.dictionary()); - Vector::EncodedString(v) + Vector::Unsigned32(v) } Column::Float(c) => { if row_ids.is_empty() { @@ -627,7 +862,7 @@ impl Column { log::debug!("time getting all encoded values {:?}", now.elapsed()); log::debug!("dictionary {:?}", c.data.dictionary()); - Vector::EncodedString(v) + Vector::Unsigned32(v) } Column::Float(c) => Vector::Float(c.all_encoded_values()), Column::Integer(c) => Vector::Integer(c.all_encoded_values()), @@ -671,9 +906,9 @@ impl Column { row_ids_vec[0] ); match self { - Column::String(c) => Vector::String(c.values(&row_ids_vec)), - Column::Float(c) => Vector::Float(c.values(&row_ids_vec)), - Column::Integer(c) => Vector::Integer(c.values(&row_ids_vec)), + Column::String(c) => Vector::NullString(c.values(&row_ids_vec)), + Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)), + Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)), } } @@ -839,7 +1074,7 @@ impl Column { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => match agg_type { AggregateType::Count => { - Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id) as u64) + Aggregate::Count(c.count_by_id_range(from_row_id, to_row_id)) } AggregateType::Sum => match c.sum_by_id_range(from_row_id, to_row_id) { Some(sum) => Aggregate::Sum(Some(Scalar::Float(sum))), @@ -1071,11 +1306,11 @@ impl String { self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.data.encoded_values(row_ids) } - pub fn all_encoded_values(&self) -> Vec { + pub fn all_encoded_values(&self) -> Vec { self.data.all_encoded_values() } @@ -1288,7 +1523,7 @@ where impl NumericColumn where - T: Clone + std::cmp::PartialOrd + std::fmt::Debug, + T: Copy + Clone + std::cmp::PartialOrd + std::fmt::Debug, { pub fn column_range(&self) -> &Option<(T, T)> { self.meta.range() @@ -1306,11 +1541,11 @@ where self.data.values(row_ids) } - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec> { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { self.data.encoded_values(row_ids) } - pub fn all_encoded_values(&self) -> Vec> { + pub fn all_encoded_values(&self) -> Vec { self.data.all_encoded_values() } @@ -1334,7 +1569,7 @@ where self.data.sum_by_id_range(from_row_id, to_row_id) } - pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> usize { + pub fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { self.data.count_by_id_range(from_row_id, to_row_id) } } @@ -1372,10 +1607,10 @@ pub mod metadata { } fn update_range(&mut self, v: T) { - match self.range { + match &mut self.range { Some(range) => { if v < range.0 { - range.0 = v; + range.0 = v.clone(); } if v > range.1 { @@ -1383,7 +1618,7 @@ pub mod metadata { } } None => { - self.range = Some((v, v)); + self.range = Some((v.clone(), v)); } } } @@ -1409,7 +1644,7 @@ pub mod metadata { } pub fn maybe_contains_value(&self, v: T) -> bool { - match self.range { + match &self.range { Some(range) => range.0 <= v && v <= range.1, None => false, } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 1945ae3224..48f00182ae 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -89,29 +89,11 @@ where /// /// encoded_values should not be called on nullable columns. fn encoded_values(&self, row_ids: &[usize]) -> Vec { - // assertion here during development to check this isn't called on - // encodings that can have null values. - assert_eq!(self.arr.null_count(), 0); - - let mut out = Vec::with_capacity(row_ids.len()); - for &row_id in row_ids { - out.push(self.arr.value(row_id)); - } - assert_eq!(out.len(), row_ids.len()); - out + panic!("encoded_values not implemented yet"); } fn all_encoded_values(&self) -> Vec { - // assertion here during development to check this isn't called on - // encodings that can have null values. - assert_eq!(self.arr.null_count(), 0); - - let mut out = Vec::with_capacity(self.arr.len()); - for i in 0..self.arr.len() { - out.push(self.arr.value(i)); - } - assert_eq!(out.len(), self.arr.len()); - out + panic!("all_encoded_values not implemented yet"); } // TODO(edd): problem here is returning a slice because we need to own the @@ -304,7 +286,7 @@ where /// Return the raw encoded values for the provided logical row ids. For Plain /// encoding this is just the decoded values. - fn encoded_values(&self, row_ids: &[usize]) -> Vec { + fn encoded_values(&self, row_ids: &[usize]) -> Vec { let mut out = Vec::with_capacity(row_ids.len()); for chunks in row_ids.chunks_exact(4) { out.push(self.values[chunks[3]]); @@ -324,7 +306,7 @@ where /// Return all encoded values. For this encoding this is just the decoded /// values - fn all_encoded_values(&self) -> Vec { + fn all_encoded_values(&self) -> Vec { self.values.clone() // TODO(edd):perf probably can return reference to vec. } @@ -723,8 +705,8 @@ impl DictionaryRLE { /// /// TODO(edd): return type is wrong but I'm making it fit /// - pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { - let mut out: Vec = Vec::with_capacity(row_ids.len()); + pub fn encoded_values(&self, row_ids: &[usize]) -> Vec { + let mut out = Vec::with_capacity(row_ids.len()); let mut curr_logical_row_id = 0; @@ -746,7 +728,7 @@ impl DictionaryRLE { } // this entry covers the row_id we want. - out.push(curr_entry_id as i64); + out.push(curr_entry_id as u32); curr_logical_row_id += 1; curr_entry_rl -= 1; } @@ -757,11 +739,11 @@ impl DictionaryRLE { // all_encoded_values materialises a vector of all encoded values for the // column. - pub fn all_encoded_values(&self) -> Vec { - let mut out: Vec = Vec::with_capacity(self.total as usize); + pub fn all_encoded_values(&self) -> Vec { + let mut out = Vec::with_capacity(self.total as usize); for (idx, rl) in &self.run_lengths { - out.extend(iter::repeat(*idx as i64).take(*rl as usize)); + out.extend(iter::repeat(*idx as u32).take(*rl as usize)); } out } @@ -863,8 +845,8 @@ mod test { arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]), }; - let encoded = col.all_encoded_values(); - assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]); + // let encoded = col.all(); + // assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]); let sum = col.sum_by_id_range(0, 1); assert_eq!(sum, Some(46.86)); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 9255817eaa..07bc024145 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -78,7 +78,8 @@ impl Segment { // TODO(edd) yuk if name == "time" { if let column::Column::Integer(ts) = &c { - self.meta.time_range = ts.column_range(); + // Right now assumption is ts column has some non-null values + self.meta.time_range = ts.column_range().unwrap(); } else { panic!("incorrect column type for time"); } @@ -316,12 +317,10 @@ impl Segment { // filtering stage we will just emit None. let mut group_itrs = group_column_encoded_values .iter() - .map(|vector| { - if let column::Vector::Integer(v) = vector { - v.iter() - } else { - panic!("don't support grouping on non-encoded values"); - } + .map(|vector| match vector { + column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns + column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column + _ => panic!("don't support grouping on non-encoded values or timestamps"), }) .collect::>(); @@ -331,7 +330,10 @@ impl Segment { let mut aggregate_itrs = aggregate_column_decoded_values .iter() .map(|(col_name, values)| match values { - Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), + Some(values) => ( + col_name.as_str(), + Some(column::NullVectorIterator::new(values)), + ), None => (col_name.as_str(), None), }) .collect::>(); @@ -339,7 +341,7 @@ impl Segment { // hashMap is about 20% faster than BTreeMap in this case let mut hash_table: BTreeMap< Vec, - Vec<(&'a String, &'a AggregateType, Option>)>, + Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)>, > = BTreeMap::new(); let mut aggregate_row: Vec<(&str, Option>)> = @@ -355,29 +357,50 @@ impl Segment { group_itrs.iter_mut().enumerate().for_each(|(i, itr)| { if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - group_key[i] = itr.next().unwrap() / window * window; + if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + group_key[i] = v / window * window; + } else { + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); + } + } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = + itr.next() + { + group_key[i] = v as i64 } else { - group_key[i] = *itr.next().unwrap(); + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); } }); // re-use aggregate_row vector. for (i, &mut (col_name, ref mut itr)) in aggregate_itrs.iter_mut().enumerate() { match itr { - Some(itr) => aggregate_row[i] = (col_name, itr.next()), + Some(itr) => { + // This is clunky. We don't need to check for the sentinel None value + // to indicate the end of the iterator because we use the guard in + // the while loop to do so. + aggregate_row[i] = (col_name, itr.next().unwrap_or(None)); + } None => aggregate_row[i] = (col_name, None), } } // This is cheaper than allocating a key and using the entry API if !hash_table.contains_key(&group_key) { - let mut agg_results: Vec<( - &'a String, - &'a AggregateType, - Option>, - )> = Vec::with_capacity(aggregates.len()); + let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)> = + Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { - agg_results.push((col_name, agg_type, None)); // switch out Aggregate for Option + agg_results.push(( + col_name, + agg_type, + match agg_type { + AggregateType::Count => column::Aggregate::Count(0), + AggregateType::Sum => column::Aggregate::Sum(None), + }, + )); } hash_table.insert(group_key.clone(), agg_results); } @@ -395,28 +418,39 @@ impl Segment { continue; } - // TODO(edd): remove unwrap - it should work because we are - // tracking iteration count in loop. - let row_value = row_value.as_ref().unwrap(); - match cum_agg_value { - Some(agg) => match agg { - column::Aggregate::Count(cum_count) => { - *cum_count += 1; - } - column::Aggregate::Sum(cum_sum) => { - *cum_sum += row_value; - } - }, - None => { - *cum_agg_value = match agg_type { - AggregateType::Count => Some(column::Aggregate::Count(0)), - AggregateType::Sum => { - Some(column::Aggregate::Sum(row_value.clone())) + column::Aggregate::Count(x) => { + *x += 1; + } + column::Aggregate::Sum(v) => { + if let Some(row_value) = row_value { + match v { + Some(x) => { + *x += row_value; + } + None => *v = Some(row_value.clone()), } } } } + // match cum_agg_value { + // Some(agg) => match agg { + // column::Aggregate::Count(_) => { + // *cum_agg_value = Some(agg + column::Aggregate::Count(Some(1))); + // } + // column::Aggregate::Sum(cum_sum) => { + // *cum_sum += row_value; + // } + // }, + // None => { + // *cum_agg_value = match agg_type { + // AggregateType::Count => Some(column::Aggregate::Count(Some(0))), + // AggregateType::Sum => { + // Some(column::Aggregate::Sum(row_value.clone())) + // } + // } + // } + // } } } processed_rows += 1; @@ -757,10 +791,6 @@ impl Segment { } // Returns the count aggregate for a given column name. - // - // Since we guarantee to provide row ids for the segment, and all columns - // have the same number of logical rows, the count is just the number of - // requested logical rows. pub fn count_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if self.column(name).is_some() { return Some(row_ids.cardinality() as u64); @@ -899,8 +929,8 @@ impl Segment { aggs.push(( (col_name.to_string(), agg.clone()), column::Aggregate::Sum( - self.sum_column(col_name, &mut filtered_row_ids).unwrap(), - ), // assuming no non-null group keys + self.sum_column(col_name, &mut filtered_row_ids), + ), )); } AggregateType::Count => { @@ -908,7 +938,7 @@ impl Segment { (col_name.to_string(), agg.clone()), column::Aggregate::Count( self.count_column(col_name, &mut filtered_row_ids).unwrap(), - ), // assuming no non-null group keys + ), )); } } @@ -1392,7 +1422,7 @@ impl<'a> Segments<'a> { // first find the logical row id of the minimum timestamp value if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { // TODO(edd): clean up unwrap - let min_ts = ts_col.column_range().0; + let min_ts = ts_col.column_range().unwrap().0; assert_eq!(min_ts, segment.meta.time_range.0); let min_ts_id = ts_col.row_id_eq_value(min_ts).unwrap(); @@ -1424,7 +1454,7 @@ impl<'a> Segments<'a> { // first find the logical row id of the minimum timestamp value if let Column::Integer(ts_col) = &segment.columns[segment.time_column_idx] { // TODO(edd): clean up unwrap - let max_ts = ts_col.column_range().1; + let max_ts = ts_col.column_range().unwrap().1; assert_eq!(max_ts, segment.meta.time_range.1); let max_ts_id = ts_col.row_id_eq_value(max_ts).unwrap(); diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs index fb592b87ad..55ae124ea6 100644 --- a/delorean_mem_qe/src/sorter.rs +++ b/delorean_mem_qe/src/sorter.rs @@ -39,7 +39,7 @@ pub enum Error { /// comparison scan performed on them to ensure they're not already sorted. const SORTED_CHECK_SIZE: usize = 1000; -/// Sort a slice of `Packers` based on the provided column indexes. +/// Sort a slice of `Vector` based on the provided column indexes. /// /// All chosen columns will be sorted in ascending order; the sort is *not* /// stable. @@ -77,9 +77,6 @@ pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), log::debug!("columns already sorted"); return Ok(()); } - // if vectors_sorted_asc(vectors, n, sort_by) { - // return Ok(()); - // } } let now = std::time::Instant::now(); quicksort_by(vectors, 0..n - 1, sort_by); @@ -136,7 +133,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range, sort_by: fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering { for &idx in sort_by { match &vectors[idx] { - column::Vector::String(p) => { + column::Vector::NullString(p) => { let cmp = p.get(a).cmp(&p.get(b)); if cmp != Ordering::Equal { return cmp; @@ -150,7 +147,7 @@ fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> } // if cmp equal then try next vector. } - _ => continue, // don't compare on non-string / timestamp cols + _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols } } Ordering::Equal @@ -161,7 +158,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi 'row_wise: for i in 1..len { for &idx in sort_by { match &vectors[idx] { - column::Vector::String(vec) => { + column::Vector::NullString(vec) => { if vec[i - 1] < vec[i] { continue 'row_wise; } else if vec[i - 1] == vec[i] { @@ -183,7 +180,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi return false; } } - _ => continue, // don't compare on non-string / timestamp cols + _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols } } } From 4f12e151d6e44f8fbe217c2f9bb0d8c4b01a754d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 17 Sep 2020 09:50:17 +0100 Subject: [PATCH 59/73] refactor: running --- delorean_mem_qe/src/bin/main.rs | 42 +++++--- delorean_mem_qe/src/column.rs | 183 ++++++++++++++++++++++++++++++-- delorean_mem_qe/src/encoding.rs | 15 +++ delorean_mem_qe/src/segment.rs | 95 +++++++++++++---- delorean_mem_qe/src/sorter.rs | 47 ++++---- 5 files changed, 322 insertions(+), 60 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 90acdd167f..40e446dd02 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -121,10 +121,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - if i < 364 { - i += 1; - continue; - } + // if i < 364 { + // i += 1; + // continue; + // } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -134,7 +134,7 @@ fn build_store( let mut segment = Segment::new(rb.num_rows(), schema); convert_record_batch(rb, &mut segment)?; - println!("{}", &segment); + // println!("{}", &segment); store.add_segment(segment); } Ok(None) => { @@ -166,30 +166,46 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er .as_any() .downcast_ref::() .unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); + + // TODO(edd): figure out how to get ownership here without + // cloning + // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Int64 => { if column.null_count() > 0 { - panic!("null times"); + panic!("null integers not expected in testing"); } let arr = column.as_any().downcast_ref::().unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); + + // TODO(edd): figure out how to get ownership here without + // cloning + // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => { if column.null_count() > 0 { - panic!("null times"); + panic!("null timestamps not expected in testing"); } let arr = column .as_any() .downcast_ref::() .unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); segment.add_column(rb.schema().field(i).name(), column); + + // TODO(edd): figure out how to get ownership here without + // cloning + // let arr: array::TimestampMicrosecondArray = + // arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Utf8 => { let arr = column @@ -469,9 +485,9 @@ fn time_group_single_with_pred(store: &Store) { fn time_group_by_multi_agg_count(store: &Store) { let strats = vec![ GroupingStrategy::HashGroup, - // GroupingStrategy::HashGroupConcurrent, + GroupingStrategy::HashGroupConcurrent, GroupingStrategy::SortGroup, - // GroupingStrategy::SortGroupConcurrent, + GroupingStrategy::SortGroupConcurrent, ]; for strat in &strats { @@ -520,7 +536,7 @@ fn time_group_by_multi_agg_sorted_count(store: &Store) { ]; for strat in &strats { - let repeat = 1; + let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 3b27743c2b..c76ce588e3 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -1251,17 +1251,36 @@ impl AggregatableByRange for &Column { } } -// impl From<&[f64]> for Column { -// fn from(values: &[f64]) -> Self { -// Self::Float(Float::from(values)) -// } -// } +use arrow::array::{Float64Array, Int64Array, TimestampMicrosecondArray}; +impl From for Column { + fn from(arr: arrow::array::Float64Array) -> Self { + Self::Float(NumericColumn::from(arr)) + } +} -// impl From<&[i64]> for Column { -// fn from(values: &[i64]) -> Self { -// Self::Integer(Integer::from(values)) -// } -// } +impl From for Column { + fn from(arr: TimestampMicrosecondArray) -> Self { + Self::Integer(NumericColumn::from(arr)) + } +} + +impl From for Column { + fn from(arr: Int64Array) -> Self { + Self::Integer(NumericColumn::from(arr)) + } +} + +impl From<&[f64]> for Column { + fn from(values: &[f64]) -> Self { + Self::Float(NumericColumn::from(values)) + } +} + +impl From<&[i64]> for Column { + fn from(values: &[i64]) -> Self { + Self::Integer(NumericColumn::from(values)) + } +} #[derive(Debug, Default)] pub struct String { @@ -1583,6 +1602,150 @@ where } } +use arrow::array::Array; +impl From for NumericColumn { + fn from(arr: arrow::array::Float64Array) -> Self { + let len = arr.len(); + let mut range: Option<(f64, f64)> = None; + + // calculate min/max for meta data + // TODO(edd): can use compute kernels for this. + for i in 0..arr.len() { + if arr.is_null(i) { + continue; + } + + let v = arr.value(i); + match range { + Some(mut range) => { + range.0 = range.0.min(v); + range.1 = range.1.max(v); + } + None => { + range = Some((v, v)); + } + } + } + + Self { + meta: metadata::Metadata::new(range, len), + data: Box::new(encoding::PlainArrow::new(arr)), + } + } +} + +impl From for NumericColumn { + fn from(arr: arrow::array::Int64Array) -> Self { + let len = arr.len(); + let mut range: Option<(i64, i64)> = None; + + // calculate min/max for meta data + // TODO(edd): can use compute kernels for this. + for i in 0..arr.len() { + if arr.is_null(i) { + continue; + } + + let v = arr.value(i); + match range { + Some(mut range) => { + range.0 = range.0.min(v); + range.1 = range.1.max(v); + } + None => { + range = Some((v, v)); + } + } + } + + Self { + meta: metadata::Metadata::new(range, len), + data: Box::new(encoding::PlainArrow::new(arr)), + } + } +} + +impl From for NumericColumn { + fn from(arr: arrow::array::TimestampMicrosecondArray) -> Self { + let len = arr.len(); + let mut range: Option<(i64, i64)> = None; + + // calculate min/max for meta data + // TODO(edd): can use compute kernels for this. + for i in 0..arr.len() { + if arr.is_null(i) { + continue; + } + + let v = arr.value(i); + match range { + Some(mut range) => { + range.0 = range.0.min(v); + range.1 = range.1.max(v); + } + None => { + range = Some((v, v)); + } + } + } + + Self { + meta: metadata::Metadata::new(range, len), + data: Box::new(encoding::PlainArrow::new(arr)), + } + } +} + +impl From<&[f64]> for NumericColumn { + fn from(values: &[f64]) -> Self { + let len = values.len(); + let mut range: Option<(f64, f64)> = None; + + // calculate min/max for meta data + for &v in values { + match range { + Some(mut range) => { + range.0 = range.0.min(v); + range.1 = range.1.max(v); + } + None => { + range = Some((v, v)); + } + } + } + + Self { + meta: metadata::Metadata::new(range, len), + data: Box::new(encoding::PlainFixed::from(values)), + } + } +} + +impl From<&[i64]> for NumericColumn { + fn from(values: &[i64]) -> Self { + let len = values.len(); + let mut range: Option<(i64, i64)> = None; + + // calculate min/max for meta data + for &v in values { + match range { + Some(mut range) => { + range.0 = range.0.min(v); + range.1 = range.1.max(v); + } + None => { + range = Some((v, v)); + } + } + } + + Self { + meta: metadata::Metadata::new(range, len), + data: Box::new(encoding::PlainFixed::from(values)), + } + } +} + pub mod metadata { use std::mem::size_of; diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 48f00182ae..e27df1ae58 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -48,6 +48,21 @@ where arr: PrimitiveArray, } +impl PlainArrow +where + T: ArrowNumericType + std::fmt::Debug, + T::Native: Default + + PartialEq + + PartialOrd + + Copy + + std::fmt::Debug + + std::ops::Add, +{ + pub fn new(arr: PrimitiveArray) -> Self { + Self { arr } + } +} + impl NumericEncoding for PlainArrow where T: ArrowNumericType + std::fmt::Debug, diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 07bc024145..b8626ea164 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -566,15 +566,25 @@ impl Segment { } log::debug!("time checking sort {:?}", now.elapsed()); + // let group_itrs = all_columns + // .iter() + // .take(group_columns.len()) // only use grouping columns + // .map(|vector| { + // if let column::Vector::Integer(v) = vector { + // v.iter() + // } else { + // panic!("don't support grouping on non-encoded values"); + // } + // }) + // .collect::>(); + let group_itrs = all_columns .iter() - .take(group_columns.len()) // only use grouping columns - .map(|vector| { - if let column::Vector::Integer(v) = vector { - v.iter() - } else { - panic!("don't support grouping on non-encoded values"); - } + .take(group_columns.len()) + .map(|vector| match vector { + column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns + column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column + _ => panic!("don't support grouping on non-encoded values or timestamps"), }) .collect::>(); @@ -653,17 +663,26 @@ impl Segment { } } - let group_itrs = group_column_encoded_values + let mut group_itrs = group_column_encoded_values .iter() - .map(|vector| { - if let column::Vector::Integer(v) = vector { - v.iter() - } else { - panic!("don't support grouping on non-encoded values"); - } + .map(|vector| match vector { + column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns + column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column + _ => panic!("don't support grouping on non-encoded values or timestamps"), }) .collect::>(); + // let group_itrs = group_column_encoded_values + // .iter() + // .map(|vector| { + // if let column::Vector::Integer(v) = vector { + // v.iter() + // } else { + // panic!("don't support grouping on non-encoded values"); + // } + // }) + // .collect::>(); + let mut aggregate_cols = Vec::with_capacity(aggregates.len()); for (column_name, agg_type) in aggregates { aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap())); @@ -676,7 +695,7 @@ impl Segment { // available and appropriately sorted this method will build a result set of // aggregates in a streaming way. pub fn stream_grouped_aggregates<'a>( - mut group_itrs: Vec>, + mut group_itrs: Vec>, aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>, total_rows: usize, window: i64, @@ -688,11 +707,30 @@ impl Segment { .iter_mut() .enumerate() .map(|(i, itr)| { + // if i == group_itrs_len - 1 && window > 0 { + // // time column - apply window function + // return itr.next().unwrap() / window * window; + // } + // *itr.next().unwrap() + if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - return itr.next().unwrap() / window * window; + if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + v / window * window + } else { + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); + } + } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = + itr.next() + { + v as i64 + } else { + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); } - *itr.next().unwrap() }) .collect::>(); @@ -713,12 +751,31 @@ impl Segment { .zip(group_itrs.iter_mut()) .enumerate() { + // let next_v = if i == group_itrs_len - 1 && window > 0 { + // // time column - apply window function + // itr.next().unwrap() / window * window + // } else { + // *itr.next().unwrap() + // }; let next_v = if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - itr.next().unwrap() / window * window + if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + v / window * window + } else { + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); + } + } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = + itr.next() + { + v as i64 } else { - *itr.next().unwrap() + unreachable!( + "something broken with grouping! Either processed None or wrong type" + ); }; + if curr_v != &next_v { group_key_changed = true; } diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs index 55ae124ea6..7dd7d6ced3 100644 --- a/delorean_mem_qe/src/sorter.rs +++ b/delorean_mem_qe/src/sorter.rs @@ -133,7 +133,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range, sort_by: fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering { for &idx in sort_by { match &vectors[idx] { - column::Vector::NullString(p) => { + column::Vector::Unsigned32(p) => { let cmp = p.get(a).cmp(&p.get(b)); if cmp != Ordering::Equal { return cmp; @@ -158,27 +158,38 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi 'row_wise: for i in 1..len { for &idx in sort_by { match &vectors[idx] { - column::Vector::NullString(vec) => { - if vec[i - 1] < vec[i] { - continue 'row_wise; - } else if vec[i - 1] == vec[i] { - // try next column - continue; - } else { - // value is > so - return false; + column::Vector::Unsigned32(vec) => { + match vec[i - 1].cmp(&vec[i]) { + Ordering::Less => continue 'row_wise, + Ordering::Equal => continue, + Ordering::Greater => return false, } + // if vec[i - 1] < vec[i] { + // continue 'row_wise; + // } else if vec[i - 1] == vec[i] { + // // try next column + // continue; + // } else { + // // value is > so + // return false; + // } } column::Vector::Integer(vec) => { - if vec[i - 1] < vec[i] { - continue 'row_wise; - } else if vec[i - 1] == vec[i] { - // try next column - continue; - } else { - // value is > so - return false; + match vec[i - 1].cmp(&vec[i]) { + Ordering::Less => continue 'row_wise, + Ordering::Equal => continue, + Ordering::Greater => return false, } + + // if vec[i - 1] < vec[i] { + // continue 'row_wise; + // } else if vec[i - 1] == vec[i] { + // // try next column + // continue; + // } else { + // // value is > so + // return false; + // } } _ => unimplemented!("todo!"), // don't compare on non-string / timestamp cols } From 751fa013e7dbd42844c402467da092e7c7f59402 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 17 Sep 2020 21:52:50 +0100 Subject: [PATCH 60/73] fix: fix some bugs --- delorean_mem_qe/src/bin/main.rs | 30 ++++++++--------- delorean_mem_qe/src/column.rs | 59 +++++++++++++++++++++++++-------- delorean_mem_qe/src/encoding.rs | 27 ++++++++++++--- delorean_mem_qe/src/segment.rs | 23 +++++++------ 4 files changed, 95 insertions(+), 44 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 40e446dd02..2061b229da 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -63,14 +63,14 @@ fn main() { ); let store = Arc::new(store); - time_select_with_pred(&store); - time_datafusion_select_with_pred(store.clone()); - time_first_host(&store); - time_sum_range(&store); - time_count_range(&store); - time_group_single_with_pred(&store); - time_group_by_multi_agg_count(&store); - time_group_by_multi_agg_sorted_count(&store); + // time_select_with_pred(&store); + // time_datafusion_select_with_pred(store.clone()); + // time_first_host(&store); + // time_sum_range(&store); + // time_count_range(&store); + // time_group_single_with_pred(&store); + // time_group_by_multi_agg_count(&store); + // time_group_by_multi_agg_sorted_count(&store); time_window_agg_count(&store); // time_group_by_different_columns(&store); } @@ -121,10 +121,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - // if i < 364 { - // i += 1; - // continue; - // } + if i < 364 { + i += 1; + continue; + } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -134,7 +134,7 @@ fn build_store( let mut segment = Segment::new(rb.num_rows(), schema); convert_record_batch(rb, &mut segment)?; - // println!("{}", &segment); + log::debug!("{}", &segment); store.add_segment(segment); } Ok(None) => { @@ -499,7 +499,7 @@ fn time_group_by_multi_agg_count(store: &Store) { let now = std::time::Instant::now(); let groups = segments.read_group_eq( - (1589000000000001, 1590044410000000), + (1589000000000001, 1590044410000001), &[], vec!["status".to_string(), "method".to_string()], vec![("counter".to_string(), AggregateType::Count)], @@ -575,7 +575,7 @@ fn time_window_agg_count(store: &Store) { ]; for strat in &strats { - let repeat = 1; + let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut total_max = 0; let segments = store.segments(); diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index c76ce588e3..61081d2de2 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -393,20 +393,41 @@ impl<'a> Vector<'a> { fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { match self { Self::NullString(vec) => { - let count = vec.iter().filter(|x| x.is_some()).count(); + let mut count = 0; + for v in &vec[from_row_id..to_row_id] { + if v.is_some() { + count += 1; + } + } count as u64 } Self::NullFloat(vec) => { - let count = vec.iter().filter(|x| x.is_some()).count(); + let mut count = 0; + for v in &vec[from_row_id..to_row_id] { + if v.is_some() { + count += 1; + } + } count as u64 } Self::NullInteger(vec) => { - let count = vec.iter().filter(|x| x.is_some()).count(); + let mut count = 0; + for v in &vec[from_row_id..to_row_id] { + if v.is_some() { + count += 1; + } + } count as u64 } - Self::Float(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values - Self::Integer(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values - Self::Unsigned32(vec) => (to_row_id - from_row_id) as u64, // fast - no possible NULL values + Self::Float(_) => { + (to_row_id - from_row_id) as u64 // fast - no possible NULL values + } + Self::Integer(_) => { + (to_row_id - from_row_id) as u64 // fast - no possible NULL values + } + Self::Unsigned32(_) => { + (to_row_id - from_row_id) as u64 // fast - no possible NULL values + } } } @@ -705,6 +726,13 @@ impl Column { /// Materialise all of the decoded values matching the provided logical /// row ids. + // + // FIXME(edd): we need to provide an API on an encoding to return raw_values + // so that we can return non-null vectors when we know the underlying encoding + // doesn't contain any null values. Right now we return nullable vectors, w + // which take up more memory and mean we can't do fast counts (since we need + // to check each value is non-null). + // pub fn values(&self, row_ids: &[usize]) -> Vector<'_> { match self { Column::String(c) => { @@ -1227,13 +1255,13 @@ impl std::fmt::Display for Column { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self { Column::String(c) => { - write!(f, "{}", c)?; + write!(f, "[String Column]: {}", c)?; } Column::Float(c) => { - write!(f, "{}", c)?; + write!(f, "[Float Column]:{}", c)?; } Column::Integer(c) => { - write!(f, "{}", c)?; + write!(f, "[Integer Column]: {}", c)?; } } Ok(()) @@ -1617,7 +1645,7 @@ impl From for NumericColumn { let v = arr.value(i); match range { - Some(mut range) => { + Some(ref mut range) => { range.0 = range.0.min(v); range.1 = range.1.max(v); } @@ -1648,7 +1676,7 @@ impl From for NumericColumn { let v = arr.value(i); match range { - Some(mut range) => { + Some(ref mut range) => { range.0 = range.0.min(v); range.1 = range.1.max(v); } @@ -1679,7 +1707,7 @@ impl From for NumericColumn { let v = arr.value(i); match range { - Some(mut range) => { + Some(ref mut range) => { range.0 = range.0.min(v); range.1 = range.1.max(v); } @@ -1704,7 +1732,10 @@ impl From<&[f64]> for NumericColumn { // calculate min/max for meta data for &v in values { match range { - Some(mut range) => { + // wow this ref totally confused me for a while. Without it + // the code will compile fine but the range option will never + // reflect changes because the tuple range will be a copy. + Some(ref mut range) => { range.0 = range.0.min(v); range.1 = range.1.max(v); } @@ -1729,7 +1760,7 @@ impl From<&[i64]> for NumericColumn { // calculate min/max for meta data for &v in values { match range { - Some(mut range) => { + Some(ref mut range) => { range.0 = range.0.min(v); range.1 = range.1.max(v); } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index e27df1ae58..ae9085e531 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -217,7 +217,13 @@ where + std::ops::Add, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[PlainArrow] size: {}", self.size()) + write!( + f, + "[PlainArrow] rows: {:?}, nulls: {:?}, size: {}", + self.arr.len(), + self.arr.null_count(), + self.size() + ) } } @@ -245,7 +251,12 @@ where + std::ops::AddAssign, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[PlainFixed] size: {}", self.size(),) + write!( + f, + "[PlainFixed] rows: {:?}, size: {}", + self.values.len(), + self.size() + ) } } @@ -481,6 +492,7 @@ pub struct DictionaryRLE { // of times the entry repeats. run_lengths: Vec<(usize, u64)>, + nulls: u64, total: u64, } @@ -492,6 +504,7 @@ impl DictionaryRLE { index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), + nulls: 0, total: 0, } } @@ -503,6 +516,7 @@ impl DictionaryRLE { index_entry: BTreeMap::new(), map_size: 0, run_lengths: Vec::new(), + nulls: 0, total: 0, }; @@ -514,7 +528,7 @@ impl DictionaryRLE { .index_row_ids .insert(next_idx as u32, croaring::Bitmap::create()); - _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta + _self.run_lengths.push((next_idx, 0)); // could this cause a bug? } _self } @@ -568,6 +582,9 @@ impl DictionaryRLE { } } self.total += additional; + if v.is_none() { + self.nulls += additional; + } } // row_ids returns an iterator over the set of row ids matching the provided @@ -817,7 +834,9 @@ impl std::fmt::Display for DictionaryRLE { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "[DictionaryRLE] size: {}, dict entries: {}, runs: {} ", + "[DictionaryRLE] rows: {:?} nulls: {:?}, size: {}, dict entries: {}, runs: {} ", + self.total, + self.nulls, self.size(), self.index_entry.len(), self.run_lengths.len() diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index b8626ea164..99cf527d0b 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -229,8 +229,7 @@ impl Segment { group_columns: &[String], aggregates: &'a [(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, Option>)>> - { + ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, column::Aggregate<'a>)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -497,6 +496,7 @@ impl Segment { .iter() .map(|v| *v as usize) .collect::>(); + log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len()); // materialise all encoded values for the matching rows in the columns // we are grouping on and store each group as an iterator. @@ -557,15 +557,13 @@ impl Segment { } let now = std::time::Instant::now(); - if self.group_key_sorted(group_columns) { - panic!("This shouldn't be called!!!"); - } else { - // now sort on the first grouping columns. Right now the order doesn't matter... - let group_col_sort_order = &(0..group_columns.len()).collect::>(); - super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); - } + assert!(!self.group_key_sorted(group_columns)); // should always need a sort if in this method log::debug!("time checking sort {:?}", now.elapsed()); + // now sort on the first grouping columns. Right now the order doesn't matter... + let group_col_sort_order = &(0..group_columns.len()).collect::>(); + super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); + // let group_itrs = all_columns // .iter() // .take(group_columns.len()) // only use grouping columns @@ -582,7 +580,9 @@ impl Segment { .iter() .take(group_columns.len()) .map(|vector| match vector { - column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns + column::Vector::Unsigned32(_) => { + column::VectorIterator::new(vector) // encoded tag columns + } column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column _ => panic!("don't support grouping on non-encoded values or timestamps"), }) @@ -641,6 +641,7 @@ impl Segment { .iter() .map(|v| *v as usize) .collect::>(); + log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len()); // materialise all encoded values for the matching rows in the columns // we are grouping on and store each group as an iterator. @@ -663,7 +664,7 @@ impl Segment { } } - let mut group_itrs = group_column_encoded_values + let group_itrs = group_column_encoded_values .iter() .map(|vector| match vector { column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns From f0b371cd6e39ab7304f2130533a8a3e75fb2ed71 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 18 Sep 2020 10:33:01 +0100 Subject: [PATCH 61/73] feat: arrow buffers working --- delorean_mem_qe/src/bin/main.rs | 76 ++++++++++++++++----------------- delorean_mem_qe/src/encoding.rs | 56 ++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 42 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 2061b229da..0f7cd65d66 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -63,15 +63,15 @@ fn main() { ); let store = Arc::new(store); - // time_select_with_pred(&store); - // time_datafusion_select_with_pred(store.clone()); - // time_first_host(&store); - // time_sum_range(&store); - // time_count_range(&store); - // time_group_single_with_pred(&store); - // time_group_by_multi_agg_count(&store); - // time_group_by_multi_agg_sorted_count(&store); - time_window_agg_count(&store); + time_select_with_pred(&store); + time_datafusion_select_with_pred(store.clone()); + time_first_host(&store); + time_sum_range(&store); + time_count_range(&store); + time_group_single_with_pred(&store); + time_group_by_multi_agg_count(&store); + time_group_by_multi_agg_sorted_count(&store); + // time_window_agg_count(&store); // time_group_by_different_columns(&store); } @@ -121,10 +121,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - if i < 364 { - i += 1; - continue; - } + // if i < 364 { + // i += 1; + // continue; + // } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -162,50 +162,50 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er if column.null_count() > 0 { panic!("null floats"); } - let arr = column - .as_any() - .downcast_ref::() - .unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); - segment.add_column(rb.schema().field(i).name(), column); + // let arr = column + // .as_any() + // .downcast_ref::() + // .unwrap(); + // let column = Column::from(arr.value_slice(0, rb.num_rows())); + // segment.add_column(rb.schema().field(i).name(), column); // TODO(edd): figure out how to get ownership here without // cloning - // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data()); - // let column = Column::from(arr); - // segment.add_column(rb.schema().field(i).name(), column); + let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data()); + let column = Column::from(arr); + segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Int64 => { if column.null_count() > 0 { panic!("null integers not expected in testing"); } - let arr = column.as_any().downcast_ref::().unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); - segment.add_column(rb.schema().field(i).name(), column); + // let arr = column.as_any().downcast_ref::().unwrap(); + // let column = Column::from(arr.value_slice(0, rb.num_rows())); + // segment.add_column(rb.schema().field(i).name(), column); // TODO(edd): figure out how to get ownership here without // cloning - // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data()); - // let column = Column::from(arr); - // segment.add_column(rb.schema().field(i).name(), column); + let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data()); + let column = Column::from(arr); + segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => { if column.null_count() > 0 { panic!("null timestamps not expected in testing"); } - let arr = column - .as_any() - .downcast_ref::() - .unwrap(); - let column = Column::from(arr.value_slice(0, rb.num_rows())); - segment.add_column(rb.schema().field(i).name(), column); + // let arr = column + // .as_any() + // .downcast_ref::() + // .unwrap(); + // let column = Column::from(arr.value_slice(0, rb.num_rows())); + // segment.add_column(rb.schema().field(i).name(), column); // TODO(edd): figure out how to get ownership here without // cloning - // let arr: array::TimestampMicrosecondArray = - // arrow::array::PrimitiveArray::from(column.data()); - // let column = Column::from(arr); - // segment.add_column(rb.schema().field(i).name(), column); + let arr: array::TimestampMicrosecondArray = + arrow::array::PrimitiveArray::from(column.data()); + let column = Column::from(arr); + segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Utf8 => { let arr = column diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index ae9085e531..e99f70cbfa 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -23,7 +23,10 @@ pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug { fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64; fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64; + // Returns the index of the first value equal to `v` fn row_id_eq_value(&self, v: Self::Item) -> Option; + + // Returns the index of the first value greater or equal to `v` fn row_id_ge_value(&self, v: Self::Item) -> Option; fn row_ids_single_cmp_roaring( @@ -186,11 +189,25 @@ where } fn row_id_eq_value(&self, v: Self::Item) -> Option { - todo!() + for i in 0..self.arr.len() { + if self.arr.is_null(i) { + continue; + } else if self.arr.value(i) == v { + return Some(i); + } + } + None } fn row_id_ge_value(&self, v: Self::Item) -> Option { - todo!() + for i in 0..self.arr.len() { + if self.arr.is_null(i) { + continue; + } else if self.arr.value(i) >= v { + return Some(i); + } + } + None } fn row_ids_single_cmp_roaring( @@ -202,7 +219,38 @@ where } fn row_ids_gte_lt_roaring(&self, from: &Self::Item, to: &Self::Item) -> croaring::Bitmap { - todo!() + let mut bm = croaring::Bitmap::create(); + + let mut found = false; //self.values[0]; + let mut count = 0; + let mut i = 0; + for i in 0..self.arr.len() { + let next = &self.arr.value(i); + if (self.arr.is_null(i) || next < from || next >= to) && found { + let (min, max) = (i as u64 - count as u64, i as u64); + bm.add_range(min..max); + found = false; + count = 0; + continue; + } else if self.arr.is_null(i) || next < from || next >= to { + continue; + } + + if !found { + found = true; + } + count += 1; + } + + // add any remaining range. + if found { + let (min, max) = ( + (self.arr.len()) as u64 - count as u64, + (self.arr.len()) as u64, + ); + bm.add_range(min..max); + } + bm } } @@ -528,7 +576,7 @@ impl DictionaryRLE { .index_row_ids .insert(next_idx as u32, croaring::Bitmap::create()); - _self.run_lengths.push((next_idx, 0)); // could this cause a bug? + _self.run_lengths.push((next_idx, 0)); // could this cause a bug?ta } _self } From e3d805e997a662c30336f8186ee7be2a72871f78 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 22 Sep 2020 11:37:09 +0100 Subject: [PATCH 62/73] feat: tag_keys implementation --- delorean_mem_qe/src/adapter.rs | 2 +- delorean_mem_qe/src/bin/main.rs | 112 ++++++++++++++-------- delorean_mem_qe/src/column.rs | 18 +++- delorean_mem_qe/src/encoding.rs | 102 +++++++++++++++++++- delorean_mem_qe/src/segment.rs | 165 ++++++++++++++++++++++++++------ 5 files changed, 323 insertions(+), 76 deletions(-) diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs index a0fdab6af6..c2537fa53c 100644 --- a/delorean_mem_qe/src/adapter.rs +++ b/delorean_mem_qe/src/adapter.rs @@ -309,7 +309,7 @@ impl Partition for SegmentPartition { // Here let _columns = segments.read_filter_eq( self.time_range, - &[(col_name, Some(&scalar))], + &[(col_name, Some(scalar))], vec![ "env".to_string(), "method".to_string(), diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 0f7cd65d66..e6251c0dd4 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -13,7 +13,7 @@ use arrow::{array, array::Array, datatypes, ipc}; use delorean_mem_qe::column; use delorean_mem_qe::column::{AggregateType, Column}; -use delorean_mem_qe::segment::{GroupingStrategy, Schema, Segment}; +use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment}; use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; use parquet::arrow::arrow_reader::ArrowReader; @@ -63,15 +63,16 @@ fn main() { ); let store = Arc::new(store); - time_select_with_pred(&store); - time_datafusion_select_with_pred(store.clone()); - time_first_host(&store); - time_sum_range(&store); - time_count_range(&store); - time_group_single_with_pred(&store); - time_group_by_multi_agg_count(&store); - time_group_by_multi_agg_sorted_count(&store); + // time_select_with_pred(&store); + // time_datafusion_select_with_pred(store.clone()); + // time_first_host(&store); + // time_sum_range(&store); + // time_count_range(&store); + // time_group_single_with_pred(&store); + // time_group_by_multi_agg_count(&store); + // time_group_by_multi_agg_sorted_count(&store); // time_window_agg_count(&store); + time_tag_keys_with_pred(&store); // time_group_by_different_columns(&store); } @@ -121,10 +122,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - // if i < 364 { - // i += 1; - // continue; - // } + if i < 360 { + i += 1; + continue; + } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -162,50 +163,50 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er if column.null_count() > 0 { panic!("null floats"); } - // let arr = column - // .as_any() - // .downcast_ref::() - // .unwrap(); - // let column = Column::from(arr.value_slice(0, rb.num_rows())); - // segment.add_column(rb.schema().field(i).name(), column); + let arr = column + .as_any() + .downcast_ref::() + .unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), ColumnType::Field(column)); // TODO(edd): figure out how to get ownership here without // cloning - let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data()); - let column = Column::from(arr); - segment.add_column(rb.schema().field(i).name(), column); + // let arr: array::Float64Array = arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Int64 => { if column.null_count() > 0 { panic!("null integers not expected in testing"); } - // let arr = column.as_any().downcast_ref::().unwrap(); - // let column = Column::from(arr.value_slice(0, rb.num_rows())); - // segment.add_column(rb.schema().field(i).name(), column); + let arr = column.as_any().downcast_ref::().unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), ColumnType::Time(column)); // TODO(edd): figure out how to get ownership here without // cloning - let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data()); - let column = Column::from(arr); - segment.add_column(rb.schema().field(i).name(), column); + // let arr: array::Int64Array = arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Timestamp(TimeUnit::Microsecond, None) => { if column.null_count() > 0 { panic!("null timestamps not expected in testing"); } - // let arr = column - // .as_any() - // .downcast_ref::() - // .unwrap(); - // let column = Column::from(arr.value_slice(0, rb.num_rows())); - // segment.add_column(rb.schema().field(i).name(), column); + let arr = column + .as_any() + .downcast_ref::() + .unwrap(); + let column = Column::from(arr.value_slice(0, rb.num_rows())); + segment.add_column(rb.schema().field(i).name(), ColumnType::Time(column)); // TODO(edd): figure out how to get ownership here without // cloning - let arr: array::TimestampMicrosecondArray = - arrow::array::PrimitiveArray::from(column.data()); - let column = Column::from(arr); - segment.add_column(rb.schema().field(i).name(), column); + // let arr: array::TimestampMicrosecondArray = + // arrow::array::PrimitiveArray::from(column.data()); + // let column = Column::from(arr); + // segment.add_column(rb.schema().field(i).name(), column); } datatypes::DataType::Utf8 => { let arr = column @@ -266,7 +267,10 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er None => c.add_additional(None, count), } - segment.add_column(rb.schema().field(i).name(), Column::String(c)); + segment.add_column( + rb.schema().field(i).name(), + ColumnType::Tag(Column::String(c)), + ); } datatypes::DataType::Boolean => { panic!("unsupported"); @@ -383,7 +387,7 @@ fn time_select_with_pred(store: &Store) { let columns = segments.read_filter_eq( (1590036110000000, 1590040770000000), - &[("env", Some(&column::Scalar::String("prod01-eu-central-1")))], + &[("env", Some(column::Scalar::String("prod01-eu-central-1")))], vec![ "env".to_string(), "method".to_string(), @@ -605,6 +609,34 @@ fn time_window_agg_count(store: &Store) { } } +// +// SHOW TAG KEYS WHERE time >= x and time < y AND "env" = 'prod01-eu-central-1' +fn time_tag_keys_with_pred(store: &Store) { + let repeat = 1000000; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let columns = segments.tag_keys( + (1588834080000000, 1590044410000000), + &[("env", "prod01-eu-central-1"), ("method", "GET")], + ); + + total_time += now.elapsed(); + track += columns.len(); + // println!("{:?}", columns); + } + println!( + "time_tag_keys_with_pred ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + // This is for a performance experiment where I wanted to show the performance // change as more columns are grouped on. // diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 61081d2de2..808f0e7415 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -1123,16 +1123,16 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - pub fn row_ids_eq(&self, value: Option<&Scalar<'_>>) -> Option { + pub fn row_ids_eq(&self, value: Option>) -> Option { let value = match value { Some(v) => v, None => return None, }; - if !self.maybe_contains(value) { + if !self.maybe_contains(&value) { return None; } - self.row_ids(value, std::cmp::Ordering::Equal) + self.row_ids(&value, std::cmp::Ordering::Equal) } pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option { @@ -1249,6 +1249,18 @@ impl Column { } } } + + // great catchy name... This determines as efficiently as possible if the + // column contains a non-null value in at least one of the provided row + // ids. + // + // row_ids *must* be in ascending order. + pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool { + match self { + Column::String(c) => c.data.has_non_null_value_in_row_ids(row_ids), + _ => unreachable!("not supported at the moment"), + } + } } impl std::fmt::Display for Column { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index e99f70cbfa..129fb68fab 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -107,11 +107,11 @@ where /// /// encoded_values should not be called on nullable columns. fn encoded_values(&self, row_ids: &[usize]) -> Vec { - panic!("encoded_values not implemented yet"); + todo!(); } fn all_encoded_values(&self) -> Vec { - panic!("all_encoded_values not implemented yet"); + todo!(); } // TODO(edd): problem here is returning a slice because we need to own the @@ -774,6 +774,58 @@ impl DictionaryRLE { out } + pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool { + let null_encoded_value = self.entry_index.get(&None); + if null_encoded_value.is_none() { + // there are no NULL entries in this encoded column so return true + // as soon a row_id is found that's < the number of rows encoded in + // the column. + for &id in row_ids { + if (id as u64) < self.total { + return true; + } + } + return false; + } + let null_encoded_value = *null_encoded_value.unwrap(); + + // Return true if there exists an encoded value at any of the row ids + // that is not equal to `null_encoded_value`. In such a case the column + // contains a non-NULL value at one of the row ids. + let mut curr_logical_row_id = 0; + let mut run_lengths_iter = self.run_lengths.iter(); + let (mut curr_encoded_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap(); + + for &row_id in row_ids { + if (row_id as u64) >= self.total { + continue; // can't possibly have a value at this row id. + } + + while curr_logical_row_id + curr_entry_rl <= row_id as u64 { + // this encoded entry does not cover the row we need. + // move on to next encoded id + curr_logical_row_id += curr_entry_rl; + match run_lengths_iter.next() { + Some(res) => { + curr_encoded_id = res.0; + curr_entry_rl = res.1; + } + // TODO(edd): deal with this properly. + None => panic!("shouldn't get here"), + } + } + + // this entry covers the row_id we want. + if curr_encoded_id != null_encoded_value { + return true; + } + curr_logical_row_id += 1; + curr_entry_rl -= 1; + } + + false + } + /// Return the decoded value for an encoded ID. /// /// Panics if there is no decoded value for the provided id @@ -1094,6 +1146,52 @@ mod test { assert_eq!(results, exp); } + #[test] + fn dict_rle_has_value_no_null() { + let mut drle = super::DictionaryRLE::new(); + let west = Some("west".to_string()); + let east = Some("east".to_string()); + let north = Some("north".to_string()); + drle.push_additional(west, 3); + drle.push_additional(east, 2); + drle.push_additional(north, 4); + + // w,w,w,e,e,n,n,n,n + // 0 1 2 3 4 5 6 7 8 + assert_eq!(drle.has_non_null_value_in_row_ids(&[0]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[1, 3]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[8]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[12, 132]), false); + } + + #[test] + fn dict_rle_has_value() { + let mut drle = super::DictionaryRLE::new(); + let west = Some("west".to_string()); + let east = Some("east".to_string()); + let north = Some("north".to_string()); + drle.push_additional(west.clone(), 3); + drle.push_additional(None, 1); + drle.push_additional(east, 2); + drle.push_additional(north, 4); + drle.push_additional(None, 4); + drle.push_additional(west, 3); + + // w,w,w,?,e,e,n,n,n,n, ?, ?, ?, ?, w, w, w + // 0 1 2 3 4 5 6 7 8 9 10 11, 12, 13, 14, 15, 16 + assert_eq!(drle.has_non_null_value_in_row_ids(&[0]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[2, 3]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[2, 3]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[3, 4, 10]), true); + assert_eq!(drle.has_non_null_value_in_row_ids(&[16, 19]), true); + + assert_eq!(drle.has_non_null_value_in_row_ids(&[3]), false); + assert_eq!(drle.has_non_null_value_in_row_ids(&[3, 10]), false); + assert_eq!(drle.has_non_null_value_in_row_ids(&[17]), false); + assert_eq!(drle.has_non_null_value_in_row_ids(&[17, 19]), false); + assert_eq!(drle.has_non_null_value_in_row_ids(&[12, 19]), false); + } + #[test] fn dict_rle_values() { let mut drle = super::DictionaryRLE::new(); diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 99cf527d0b..6d3974434e 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet}; use super::column; use super::column::{AggregateType, Column}; @@ -7,6 +7,23 @@ use arrow::datatypes::SchemaRef; // Only used in a couple of specific places for experimentation. const THREADS: usize = 16; +/// ColumnType describes the logical type a column can have. +pub enum ColumnType { + Tag(column::Column), + Field(column::Column), + Time(column::Column), +} + +impl ColumnType { + fn num_rows(&self) -> usize { + match &self { + ColumnType::Tag(c) => c.num_rows(), + ColumnType::Field(c) => c.num_rows(), + ColumnType::Time(c) => c.num_rows(), + } + } +} + #[derive(Debug)] pub struct Schema { _ref: SchemaRef, @@ -51,7 +68,9 @@ pub struct Segment { meta: SegmentMetaData, // Columns within a segment - columns: Vec, + columns: Vec, + + tag_column_idxs: Vec, // todo(edd): add vectors to each type time_column_idx: usize, } @@ -61,35 +80,46 @@ impl Segment { Self { meta: SegmentMetaData::new(rows, schema), columns: Vec::with_capacity(cols), + tag_column_idxs: vec![], time_column_idx: 0, } } - pub fn add_column(&mut self, name: &str, c: column::Column) { + pub fn add_column(&mut self, name: &str, ct: ColumnType) { assert_eq!( self.meta.rows, - c.num_rows(), + ct.num_rows(), "Column {:?} has {:?} rows but wanted {:?}", name, - c.num_rows(), + ct.num_rows(), self.meta.rows ); - // TODO(edd) yuk - if name == "time" { - if let column::Column::Integer(ts) = &c { - // Right now assumption is ts column has some non-null values - self.meta.time_range = ts.column_range().unwrap(); - } else { - panic!("incorrect column type for time"); - } - self.time_column_idx = self.columns.len(); - } - // validate column doesn't already exist in segment assert!(!self.meta.column_names.contains(&name.to_owned())); self.meta.column_names.push(name.to_owned()); - self.columns.push(c); + + match ct { + ColumnType::Time(c) => { + assert_eq!(name, "time"); + + if let Column::Integer(ts) = &c { + // Right now assumption is ts column has some non-null values + self.meta.time_range = ts.column_range().unwrap(); + } else { + panic!("incorrect column type for time"); + } + self.time_column_idx = self.columns.len(); + self.columns.push(c); + } + ColumnType::Tag(c) => { + self.tag_column_idxs.push(self.columns.len()); + self.columns.push(c); + } + ColumnType::Field(c) => { + self.columns.push(c); + } + } } pub fn num_rows(&self) -> usize { @@ -101,7 +131,7 @@ impl Segment { } /// column returns the column with name - pub fn column(&self, name: &str) -> Option<&column::Column> { + pub fn column(&self, name: &str) -> Option<&Column> { if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) { return self.columns.get(*id); } @@ -225,7 +255,7 @@ impl Segment { pub fn aggregate_by_group_with_hash<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'a>>)], + predicates: &[(&str, Option>)], group_columns: &[String], aggregates: &'a [(String, AggregateType)], window: i64, @@ -464,7 +494,7 @@ impl Segment { pub fn aggregate_by_group_using_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, @@ -609,7 +639,7 @@ impl Segment { pub fn aggregate_by_group_using_stream<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, @@ -859,7 +889,7 @@ impl Segment { pub fn filter_by_predicates_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], ) -> Option { if !self.meta.overlaps_time_range(time_range.0, time_range.1) { return None; // segment doesn't have time range @@ -877,7 +907,7 @@ impl Segment { fn filter_by_predicates_eq_time( &self, time_range: (i64, i64), - predicates: Vec<(&str, Option<&column::Scalar<'_>>)>, + predicates: Vec<(&str, Option>)>, ) -> Option { // Get all row_ids matching the time range: // @@ -915,7 +945,7 @@ impl Segment { // meta row_ids bitmap. fn filter_by_predicates_eq_no_time( &self, - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], ) -> Option { if predicates.is_empty() { // In this case there are no predicates provided and we have no time @@ -930,7 +960,7 @@ impl Segment { for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { // TODO(edd): rework this clone - match c.row_ids_eq(*col_pred_value) { + match c.row_ids_eq(col_pred_value.clone()) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -959,7 +989,7 @@ impl Segment { pub fn group_single_agg_by_predicate_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], group_column: &String, aggregates: &Vec<(String, column::AggregateType)>, ) -> BTreeMap)>> { @@ -1021,6 +1051,59 @@ impl Segment { } grouped_results } + + pub fn tag_keys( + &self, + time_range: (i64, i64), + predicates: &[(&str, &str)], + ) -> BTreeSet { + let (seg_min, seg_max) = self.meta.time_range; + if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max { + // the segment is completely overlapped by the time range of query, + // and there are no predicates + todo!("fast path") + } + + let pred_vec = predicates + .iter() + .map(|p| (p.0, Some(column::Scalar::String(p.1)))) + .collect::>(); + + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) { + filtered_row_ids = row_ids; + } else { + return BTreeSet::new(); // no matching rows for predicate + time range + } + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len()); + let mut results = BTreeSet::new(); + + // any columns that are in predicate set using equality predicates should + // be automatically included in results. + // + // TODO(edd): when predicates get more complicated it's likely this + // assumption will be a hard one to make. + for (col, _) in predicates { + results.insert(String::from(*col)); + } + + // now check if any of the other tag columns have a non-null value for + // any of the filtered ids. + for &i in &self.tag_column_idxs { + let col = &self.columns[i]; + if col.has_non_null_value_in_row_ids(&filtered_row_ids_vec) { + results.insert(self.column_names().get(i).unwrap().clone()); + } + } + + results + } } impl std::fmt::Display for Segment { @@ -1111,7 +1194,7 @@ impl<'a> Segments<'a> { pub fn read_filter_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'_>>)], + predicates: &[(&str, Option>)], select_columns: Vec, ) -> BTreeMap> { let (min, max) = time_range; @@ -1147,7 +1230,7 @@ impl<'a> Segments<'a> { pub fn read_group_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'a>>)], + predicates: &[(&str, Option>)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, @@ -1197,7 +1280,7 @@ impl<'a> Segments<'a> { fn read_group_eq_hash( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'a>>)], + predicates: &[(&str, Option>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, @@ -1283,7 +1366,7 @@ impl<'a> Segments<'a> { fn read_group_eq_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option<&column::Scalar<'a>>)], + predicates: &[(&str, Option>)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, @@ -1524,6 +1607,28 @@ impl<'a> Segments<'a> { panic!("time column wrong type!"); } } + + pub fn tag_keys( + &self, + time_range: (i64, i64), + predicates: &[(&str, &str)], + ) -> BTreeSet { + let (min, max) = time_range; + if max <= min { + panic!("max <= min"); + } + + let mut columns = BTreeSet::new(); + + for segment in &self.segments { + if !segment.meta.overlaps_time_range(min, max) { + continue; // segment doesn't have time range + } + columns.append(&mut segment.tag_keys(time_range, predicates)); + } + + columns + } } #[derive(Debug)] From 3963cf6cce59ea03451dd892b958f81ef0376671 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Tue, 22 Sep 2020 14:57:44 +0100 Subject: [PATCH 63/73] perf: improve performance of tag keys --- delorean_mem_qe/src/bin/main.rs | 10 ++++---- delorean_mem_qe/src/column.rs | 6 ++--- delorean_mem_qe/src/segment.rs | 41 ++++++++++++++++++++++++++------- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index e6251c0dd4..b1b5768a0e 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -122,10 +122,10 @@ fn build_store( match rb { Err(e) => println!("WARNING: error reading batch: {:?}, SKIPPING", e), Ok(Some(rb)) => { - if i < 360 { - i += 1; - continue; - } + // if i < 360 { + // i += 1; + // continue; + // } let schema = Schema::with_sort_order( rb.schema(), sort_order.iter().map(|s| s.to_string()).collect(), @@ -612,7 +612,7 @@ fn time_window_agg_count(store: &Store) { // // SHOW TAG KEYS WHERE time >= x and time < y AND "env" = 'prod01-eu-central-1' fn time_tag_keys_with_pred(store: &Store) { - let repeat = 1000000; + let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); let mut track = 0; let segments = store.segments(); diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 808f0e7415..15121f1070 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -1123,16 +1123,16 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - pub fn row_ids_eq(&self, value: Option>) -> Option { + pub fn row_ids_eq(&self, value: &Option>) -> Option { let value = match value { Some(v) => v, None => return None, }; - if !self.maybe_contains(&value) { + if !self.maybe_contains(value) { return None; } - self.row_ids(&value, std::cmp::Ordering::Equal) + self.row_ids(value, std::cmp::Ordering::Equal) } pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 6d3974434e..0ef941ad8f 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -916,11 +916,12 @@ impl Segment { &column::Scalar::Integer(time_range.0), &column::Scalar::Integer(time_range.1), )?; + log::debug!("time col bitmap contains {:?} values out of {:?} rows. requested range was {:?}, meta range is {:?}",bm.cardinality(),self.num_rows(), time_range, self.meta.time_range); // now intersect matching rows for each column for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { - match c.row_ids_eq(col_pred_value) { + match c.row_ids_eq(&col_pred_value) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -959,8 +960,7 @@ impl Segment { // now intersect matching rows for each column for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { - // TODO(edd): rework this clone - match c.row_ids_eq(col_pred_value.clone()) { + match c.row_ids_eq(col_pred_value) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -1056,7 +1056,23 @@ impl Segment { &self, time_range: (i64, i64), predicates: &[(&str, &str)], - ) -> BTreeSet { + exclude_columns: &BTreeSet, + ) -> Option> { + // first check if we have any columns not in the exclusion set. + let mut all_excluded = true; + for &i in &self.tag_column_idxs { + let col_name = self.column_names().get(i).unwrap(); + if !exclude_columns.contains(col_name) { + all_excluded = false; + break; + } + } + + if all_excluded { + log::debug!("skipping segment as all tag columns excluded"); + return None; // we don't have any tag columns to offer. + } + let (seg_min, seg_max) = self.meta.time_range; if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max { // the segment is completely overlapped by the time range of query, @@ -1073,7 +1089,7 @@ impl Segment { if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) { filtered_row_ids = row_ids; } else { - return BTreeSet::new(); // no matching rows for predicate + time range + return None; // no matching rows for predicate + time range } let filtered_row_ids_vec = filtered_row_ids @@ -1097,12 +1113,18 @@ impl Segment { // any of the filtered ids. for &i in &self.tag_column_idxs { let col = &self.columns[i]; + let col_name = self.column_names().get(i).unwrap(); + + if exclude_columns.contains(col_name) { + continue; + } + if col.has_non_null_value_in_row_ids(&filtered_row_ids_vec) { - results.insert(self.column_names().get(i).unwrap().clone()); + results.insert(col_name.clone()); } } - results + Some(results) } } @@ -1624,7 +1646,10 @@ impl<'a> Segments<'a> { if !segment.meta.overlaps_time_range(min, max) { continue; // segment doesn't have time range } - columns.append(&mut segment.tag_keys(time_range, predicates)); + let segment_columns = segment.tag_keys(time_range, predicates, &columns); + if let Some(mut result) = segment_columns { + columns.append(&mut result); + } } columns From d0f3cae9b317c71e1aacd08378333a72ffa1e99e Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 24 Sep 2020 14:30:27 +0100 Subject: [PATCH 64/73] feat: add tag values schema API --- Cargo.lock | 52 +++++------- delorean_mem_qe/src/bin/main.rs | 33 +++++++- delorean_mem_qe/src/column.rs | 40 +++++++++ delorean_mem_qe/src/encoding.rs | 78 +++++++++++++++++- delorean_mem_qe/src/segment.rs | 139 ++++++++++++++++++++++++++++++++ 5 files changed, 307 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63e340c752..7d7c6a3a41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -87,11 +87,7 @@ checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" -<<<<<<< HEAD source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" -======= -source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" ->>>>>>> 27b73c4... refactor: add encoding trait dependencies = [ "chrono", "csv", @@ -111,7 +107,7 @@ dependencies = [ [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" +source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" dependencies = [ "chrono", "csv", @@ -669,27 +665,7 @@ dependencies = [ [[package]] name = "datafusion" version = "2.0.0-SNAPSHOT" -<<<<<<< HEAD source = "git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d#171e8bfe5fe13467a1763227e495fae6bc5d011d" -======= -source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" -dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", - "clap", - "crossbeam", - "fnv", - "num_cpus", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", - "paste", - "rustyline", - "sqlparser", -] - -[[package]] -name = "datafusion" -version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" ->>>>>>> 27b73c4... refactor: add encoding trait dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "chrono", @@ -703,6 +679,22 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion" +version = "2.0.0-SNAPSHOT" +source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" +dependencies = [ + "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "clap", + "crossbeam", + "fnv", + "num_cpus", + "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "paste", + "rustyline", + "sqlparser", +] + [[package]] name = "delorean" version = "0.1.0" @@ -713,11 +705,7 @@ dependencies = [ "clap", "criterion", "csv", -<<<<<<< HEAD "delorean_arrow", -======= - "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)", ->>>>>>> 27b73c4... refactor: add encoding trait "delorean_generated_types", "delorean_ingest", "delorean_line_parser", @@ -763,7 +751,7 @@ name = "delorean_arrow" version = "0.1.0" dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", - "datafusion", + "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", ] @@ -2111,9 +2099,9 @@ dependencies = [ [[package]] name = "parquet" version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4#62dfa114d6683172927fab40fa6c4ddabae8fef4" +source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=62dfa114d6683172927fab40fa6c4ddabae8fef4)", + "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "brotli", "byteorder", "chrono", diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index b1b5768a0e..659e1028e9 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -72,7 +72,8 @@ fn main() { // time_group_by_multi_agg_count(&store); // time_group_by_multi_agg_sorted_count(&store); // time_window_agg_count(&store); - time_tag_keys_with_pred(&store); + // time_tag_keys_with_pred(&store); + time_tag_values_with_pred(&store); // time_group_by_different_columns(&store); } @@ -621,7 +622,7 @@ fn time_tag_keys_with_pred(store: &Store) { let columns = segments.tag_keys( (1588834080000000, 1590044410000000), - &[("env", "prod01-eu-central-1"), ("method", "GET")], + &[("env", "prod01-eu-central-1")], ); total_time += now.elapsed(); @@ -637,6 +638,34 @@ fn time_tag_keys_with_pred(store: &Store) { ); } +// +// SHOW TAG VALUES ON "host", "method" WHERE time >= x and time < y AND "env" = 'prod01-us-west-1' +fn time_tag_values_with_pred(store: &Store) { + let repeat = 10; + let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); + let mut track = 0; + let segments = store.segments(); + for _ in 0..repeat { + let now = std::time::Instant::now(); + + let tag_values = segments.tag_values( + (1588834080000000, 1590044410000000), + &[("env", "prod01-us-west-2")], + &["host".to_string(), "method".to_string()], + ); + + total_time += now.elapsed(); + track += tag_values.len(); + } + println!( + "time_tag_values_with_pred ran {:?} in {:?} {:?} / call {:?}", + repeat, + total_time, + total_time / repeat, + track + ); +} + // This is for a performance experiment where I wanted to show the performance // change as more columns are grouped on. // diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 15121f1070..ee63e51201 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -1,7 +1,14 @@ +use std::collections::BTreeSet; use std::convert::From; use super::encoding; +#[derive(Debug)] +pub enum Set<'a> { + String(BTreeSet<&'a std::string::String>), + Integer(BTreeSet), +} + #[derive(Debug, PartialEq, PartialOrd, Clone)] pub enum Value<'a> { Null, @@ -1261,6 +1268,39 @@ impl Column { _ => unreachable!("not supported at the moment"), } } + + /// This returns the distinct set of values in the column from the set of + /// rows provided. + /// + /// NULL values are not included in the returned set even if present in the + /// column at provided rows. + /// + /// row_ids *must* be in ascending order. + pub fn distinct_values(&self, row_ids: &[usize]) -> Set<'_> { + match self { + Column::String(c) => Set::String(c.data.distinct_values(row_ids)), + _ => unreachable!("not supported at the moment"), + } + } + + /// Returns true if the column contains any values other than those in + /// `values`. + pub fn contains_other_values(&self, values: &BTreeSet<&std::string::String>) -> bool { + match self { + Column::String(c) => { + // TODO(edd): + // had problems with ref inside of enum Set variant. + + // if let Set::String(v) = values { + c.data.contains_other_values(values) + // } else { + // panic!("incompatible set with column type"); + // } + // Set::String(c.data.distinct_values(row_ids)) + } + _ => unreachable!("not supported at the moment"), + } + } } impl std::fmt::Display for Column { diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 129fb68fab..09467bea63 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::iter; use std::mem::size_of; @@ -774,6 +774,82 @@ impl DictionaryRLE { out } + /// Returns the unique set of values encoded at each of the provided ids. + /// NULL values are not returned. + pub fn distinct_values(&self, row_ids: &[usize]) -> BTreeSet<&String> { + // TODO(edd): can improve on this if we know encoded data is totally + // ordered. + let mut encoded_values = HashSet::new(); + + let mut curr_logical_row_id = 0; + let mut run_lengths_iter = self.run_lengths.iter(); + let (mut curr_entry_id, mut curr_entry_rl) = run_lengths_iter.next().unwrap(); + + 'by_row: for row_id in row_ids { + while curr_logical_row_id + curr_entry_rl <= *row_id as u64 { + // this encoded entry does not cover the row we need. + // move on to next entry + curr_logical_row_id += curr_entry_rl; + match run_lengths_iter.next() { + Some(res) => { + curr_entry_id = res.0; + curr_entry_rl = res.1; + } + None => panic!("shouldn't get here"), + } + } + + // track encoded value + encoded_values.insert(curr_entry_id); + if encoded_values.len() == self.index_entry.len() { + // all distinct values have been read + break 'by_row; + } + + curr_logical_row_id += 1; + curr_entry_rl -= 1; + } + + assert!(encoded_values.len() <= self.index_entry.len()); + + // Finally, materialise the decoded values for the encoded set. + let mut results = BTreeSet::new(); + for id in encoded_values.iter() { + let decoded_value = self.index_entry.get(id).unwrap(); + if let Some(value) = decoded_value { + results.insert(value); + } + } + results + } + + /// Returns true if the encoding contains values other than those provided in + /// `values`. + pub fn contains_other_values(&self, values: &BTreeSet<&String>) -> bool { + let mut encoded_values = self.entry_index.len(); + if self.entry_index.contains_key(&None) { + encoded_values -= 1; + } + + if encoded_values > values.len() { + return true; + } + + for key in self.entry_index.keys() { + match key { + Some(key) => { + if !values.contains(key) { + return true; + } + } + None => continue, // skip NULL + } + } + false + } + + /// Determines if the encoded data contains at least one non-null value at + /// any of the provided row ids. pub fn has_non_null_value_in_row_ids(&self, row_ids: &[usize]) -> bool { let null_encoded_value = self.entry_index.get(&None); if null_encoded_value.is_none() { diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 0ef941ad8f..fdde84f430 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -130,6 +130,11 @@ impl Segment { &self.meta.column_names } + /// Determines if the segment contains a column with the provided name. + pub fn has_column(&self, name: &String) -> bool { + self.meta.column_names.contains(name) + } + /// column returns the column with name pub fn column(&self, name: &str) -> Option<&Column> { if let Some(id) = &self.meta.column_names.iter().position(|c| c == name) { @@ -1126,6 +1131,89 @@ impl Segment { Some(results) } + + pub fn tag_values( + &self, + time_range: (i64, i64), + predicates: &[(&str, &str)], + tag_keys: &[String], + excluded_tag_values: &BTreeMap>, + ) -> Option>> { + // first check if we have any columns that should be processed. + let mut have_some_cols = false; + for &i in &self.tag_column_idxs { + let col_name = self.column_names().get(i).unwrap(); + if tag_keys.contains(col_name) { + have_some_cols = true; + break; + } + } + + if !have_some_cols { + log::debug!("skipping segment because no columns for tag keys present"); + return None; // we don't have any tag columns to offer. + } + + let (seg_min, seg_max) = self.meta.time_range; + if predicates.is_empty() && time_range.0 <= seg_min && time_range.1 > seg_max { + // the segment is completely overlapped by the time range of query, + // and there are no predicates + todo!("fast path") + } + + let pred_vec = predicates + .iter() + .map(|p| (p.0, Some(column::Scalar::String(p.1)))) + .collect::>(); + + let filtered_row_ids: croaring::Bitmap; + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) { + filtered_row_ids = row_ids; + } else { + return None; // no matching rows for predicate + time range + } + + let mut results = BTreeMap::new(); + + let filtered_row_ids_vec = filtered_row_ids + .to_vec() + .iter() + .map(|v| *v as usize) + .collect::>(); + log::debug!("filtered to {:?} rows.", filtered_row_ids_vec.len()); + + for &i in &self.tag_column_idxs { + let col = &self.columns[i]; + let col_name = self.column_names().get(i).unwrap(); + + if !tag_keys.contains(col_name) { + continue; + } + + // if !col.contains_other_values(&column::Set::String( + // *excluded_tag_values.get(col_name).unwrap(), + // )) { + // log::debug!("skipping!!"); + // continue; + // } + + if let Some(exclude_tag_values) = excluded_tag_values.get(col_name) { + if !col.contains_other_values(exclude_tag_values) { + log::debug!("skipping!!"); + continue; + } + } + + if let column::Set::String(values) = col.distinct_values(&filtered_row_ids_vec) { + log::debug!("distinct values: {:?}", values); + results.insert(col_name, values); + } else { + unreachable!("only works on tag columns"); + } + } + + Some(results) + } } impl std::fmt::Display for Segment { @@ -1630,6 +1718,8 @@ impl<'a> Segments<'a> { } } + /// Returns the distinct set of tag keys (column names) matching the provided + /// predicates and time range. pub fn tag_keys( &self, time_range: (i64, i64), @@ -1654,6 +1744,55 @@ impl<'a> Segments<'a> { columns } + + /// Returns the distinct set of tag values (column values) for each provided + /// tag key, where each returned value lives in a row matching the provided + /// predicates and time range. + /// + /// As a special case, if no values are provided for `tag_keys` then all + /// tag key-values are returned for the segments. + pub fn tag_values( + &self, + time_range: (i64, i64), + predicates: &[(&str, &str)], + tag_keys: &[String], + ) -> BTreeMap> { + let (min, max) = time_range; + if max <= min { + panic!("max <= min"); + } + + let mut results: BTreeMap> = BTreeMap::new(); + + for segment in &self.segments { + if !segment.meta.overlaps_time_range(min, max) { + continue; // segment doesn't have time range + } + + let col_names = if tag_keys.is_empty() { + segment.column_names() + } else { + tag_keys + }; + + let segment_values = segment.tag_values(time_range, predicates, col_names, &results); + match segment_values { + Some(values) => { + for (tag_key, mut tag_values) in values { + if !results.contains_key(tag_key) { + results.insert(tag_key.clone(), tag_values); + } else { + let all_values = results.get_mut(tag_key).unwrap(); + all_values.append(&mut tag_values); + } + } + } + None => continue, + } + } + + results + } } #[derive(Debug)] From c42d2dcd794d2006a7cc17f6d36cd2d33173a3d1 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 24 Sep 2020 15:41:48 +0100 Subject: [PATCH 65/73] refactor: rebase with delorean_arrow --- Cargo.lock | 73 +--- Cargo.toml | 2 +- delorean_mem_qe/Cargo.toml | 4 +- delorean_mem_qe/src/adapter.rs | 575 ++++++++++++++++---------------- delorean_mem_qe/src/bin/main.rs | 87 +++-- delorean_mem_qe/src/column.rs | 17 +- delorean_mem_qe/src/encoding.rs | 5 +- delorean_mem_qe/src/lib.rs | 2 +- delorean_mem_qe/src/segment.rs | 6 +- 9 files changed, 355 insertions(+), 416 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d7c6a3a41..a6f96b0298 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -104,26 +104,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "arrow" -version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" -dependencies = [ - "chrono", - "csv", - "flatbuffers", - "hex", - "indexmap", - "lazy_static", - "num 0.3.0", - "prettytable-rs", - "rand", - "regex", - "serde", - "serde_derive", - "serde_json", -] - [[package]] name = "arrow" version = "2.0.0-SNAPSHOT" @@ -673,23 +653,7 @@ dependencies = [ "crossbeam", "fnv", "num_cpus", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", - "paste", - "rustyline", - "sqlparser", -] - -[[package]] -name = "datafusion" -version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" -dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", - "clap", - "crossbeam", - "fnv", - "num_cpus", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "parquet", "paste", "rustyline", "sqlparser", @@ -751,8 +715,8 @@ name = "delorean_arrow" version = "0.1.0" dependencies = [ "arrow 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", - "datafusion 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/apache/arrow.git?rev=171e8bfe5fe13467a1763227e495fae6bc5d011d)", + "datafusion", + "parquet", ] [[package]] @@ -800,17 +764,15 @@ dependencies = [ name = "delorean_mem_qe" version = "0.1.0" dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "chrono", "croaring", "crossbeam", - "datafusion 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", + "delorean_arrow", "delorean_table", "env_logger", "heapsize", "human_format", "log", - "parquet 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", "snafu", ] @@ -2096,25 +2058,6 @@ dependencies = [ "zstd", ] -[[package]] -name = "parquet" -version = "2.0.0-SNAPSHOT" -source = "git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd#46f18c2602072e083809e0846b810e0cc3c59fdd" -dependencies = [ - "arrow 2.0.0-SNAPSHOT (git+https://github.com/alamb/arrow.git?rev=46f18c2602072e083809e0846b810e0cc3c59fdd)", - "brotli", - "byteorder", - "chrono", - "flate2", - "lz4", - "num-bigint 0.3.0", - "parquet-format", - "serde_json", - "snap", - "thrift", - "zstd", -] - [[package]] name = "parquet-format" version = "2.6.1" @@ -2877,9 +2820,9 @@ checksum = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f" [[package]] name = "snafu" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f5aed652511f5c9123cf2afbe9c244c29db6effa2abb05c866e965c82405ce" +checksum = "9c4e6046e4691afe918fd1b603fd6e515bcda5388a1092a9edbada307d159f09" dependencies = [ "doc-comment", "futures-core", @@ -2889,9 +2832,9 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebf8f7d5720104a9df0f7076a8682024e958bba0fe9848767bb44f251f3648e9" +checksum = "7073448732a89f2f3e6581989106067f403d378faeafb4a50812eb814170d3e5" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index fbc65ec72c..59c885c464 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,7 +69,7 @@ tracing = "0.1" tracing-futures="0.2.4" http = "0.2.0" -snafu = "0.6.2" +snafu = "0.6.9" libflate = "1.0.0" [dev-dependencies] diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 742df5ba19..5c0fbc3f37 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -8,10 +8,8 @@ edition = "2018" [dependencies] +delorean_arrow = { path = "../delorean_arrow" } delorean_table = { path = "../delorean_table" } -arrow = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } -parquet = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } -datafusion = { git = "https://github.com/alamb/arrow.git", rev="46f18c2602072e083809e0846b810e0cc3c59fdd", version = "2.0.0-SNAPSHOT" } heapsize = "0.4.2" snafu = "0.6.8" croaring = "0.4.5" diff --git a/delorean_mem_qe/src/adapter.rs b/delorean_mem_qe/src/adapter.rs index c2537fa53c..eeb2523a5e 100644 --- a/delorean_mem_qe/src/adapter.rs +++ b/delorean_mem_qe/src/adapter.rs @@ -1,330 +1,331 @@ //! Code for interfacing and running queries in DataFusion -use crate::Store; -use arrow::{ - datatypes::{Schema, SchemaRef}, - record_batch::{RecordBatch, RecordBatchReader}, - util::pretty, -}; -use datafusion::prelude::*; -use datafusion::{ - datasource::TableProvider, - execution::{ - context::ExecutionContextState, - physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition}, - }, - logicalplan::{make_logical_plan_node, Expr, LogicalPlan}, - lp::LogicalPlanNode, - optimizer::utils, -}; +// use crate::Store; +// use delorean_arrow::arrow::{ +// datatypes::{Schema, SchemaRef}, +// record_batch::{RecordBatch, RecordBatchReader}, +// util::pretty, +// }; +// use delorean_arrow::datafusion::prelude::*; +// use delorean_arrow::datafusion::{ +// datasource::TableProvider, +// execution::{ +// context::ExecutionContextState, +// physical_plan::{common::RecordBatchIterator, ExecutionPlan, Partition}, +// }, +// logicalplan::{make_logical_plan_node, Expr, LogicalPlan}, +// lp::LogicalPlanNode, +// optimizer::utils, +// }; -use crate::column; -use std::{ - fmt, - sync::{Arc, Mutex}, -}; +// use crate::column; +// use std::{ +// fmt, +// sync::{Arc, Mutex}, +// }; -/// Wrapper to adapt a Store to a DataFusion "TableProvider" -- -/// eventually we could also implement this directly on Store -pub struct StoreTableSource { - store: Arc, -} +// Wrapper to adapt a Store to a DataFusion "TableProvider" -- +// eventually we could also implement this directly on Store +// pub struct StoreTableSource { +// store: Arc, +// } -impl<'a> StoreTableSource { - pub fn new(store: Arc) -> Self { - Self { store } - } -} +// impl<'a> StoreTableSource { +// pub fn new(store: Arc) -> Self { +// Self { store } +// } +// } -impl TableProvider for StoreTableSource { - /// Get a reference to the schema for this table - fn schema(&self) -> SchemaRef { - self.store.schema() - } +// impl TableProvider for StoreTableSource { +// /// Get a reference to the schema for this table +// fn schema(&self) -> SchemaRef { +// self.store.schema() +// } - /// Perform a scan of a table and return a sequence of iterators over the data (one - /// iterator per partition) - fn scan( - &self, - _projection: &Option>, - _batch_size: usize, - ) -> datafusion::error::Result>> { - unimplemented!("scan not yet implemented"); - } -} +// /// Perform a scan of a table and return a sequence of iterators over the data (one +// /// iterator per partition) +// fn scan( +// &self, +// _projection: &Option>, +// _batch_size: usize, +// ) -> delorean_arrow::datafusion::error::Result>> { +// unimplemented!("scan not yet implemented"); +// } +// } -/// Prototype of how a Delorean query engine, built on top of -/// DataFusion, but using specialized column store operators might -/// look like. -/// -/// Data from the Segments in the `store` are visible in DataFusion -/// as a table ("measurement") in this prototype. -pub struct DeloreanQueryEngine { - ctx: ExecutionContext, - store: Arc, -} +// /// Prototype of how a Delorean query engine, built on top of +// /// DataFusion, but using specialized column store operators might +// /// look like. +// /// +// /// Data from the Segments in the `store` are visible in DataFusion +// /// as a table ("measurement") in this prototype. +// pub struct DeloreanQueryEngine { +// ctx: ExecutionContext, +// store: Arc, +// } -impl DeloreanQueryEngine { - pub fn new(store: Arc) -> Self { - let start = std::time::Instant::now(); - let mut ctx = ExecutionContext::new(); - let source = StoreTableSource::new(store.clone()); - let source = Box::new(source); - ctx.register_table("measurement", source); - println!("Completed setup in {:?}", start.elapsed()); - DeloreanQueryEngine { ctx, store } - } +// impl DeloreanQueryEngine { +// pub fn new(store: Arc) -> Self { +// let start = std::time::Instant::now(); +// let mut ctx = ExecutionContext::new(); +// let source = StoreTableSource::new(store.clone()); +// let source = Box::new(source); +// ctx.register_table("measurement", source); +// println!("Completed setup in {:?}", start.elapsed()); +// DeloreanQueryEngine { ctx, store } +// } - // Run the specified SQL and return the number of records matched - pub fn run_sql(&mut self, sql: &str) -> usize { - let plan = self - .ctx - .create_logical_plan(sql) - .expect("Creating the logical plan"); +// // Run the specified SQL and return the number of records matched +// pub fn run_sql(&mut self, sql: &str) -> usize { +// let plan = self +// .ctx +// .create_logical_plan(sql) +// .expect("Creating the logical plan"); - //println!("Created logical plan:\n{:?}", plan); - let plan = self.rewrite_to_segment_scan(&plan); - //println!("Rewritten logical plan:\n{:?}", plan); +// //println!("Created logical plan:\n{:?}", plan); +// let plan = self.rewrite_to_segment_scan(&plan); +// //println!("Rewritten logical plan:\n{:?}", plan); - match self.ctx.collect_plan(&plan) { - Err(err) => { - println!("Error running query: {:?}", err); - 0 - } - Ok(results) => { - if results.is_empty() { - //println!("Empty result returned"); - 0 - } else { - pretty::print_batches(&results).expect("printing"); - results.iter().map(|b| b.num_rows()).sum() - } - } - } - } +// match self.ctx.collect_plan(&plan) { +// Err(err) => { +// println!("Error running query: {:?}", err); +// 0 +// } +// Ok(results) => { +// if results.is_empty() { +// //println!("Empty result returned"); +// 0 +// } else { +// pretty::print_batches(&results).expect("printing"); +// results.iter().map(|b| b.num_rows()).sum() +// } +// } +// } +// } - /// Specialized optimizer pass that combines a `TableScan` and a `Filter` - /// together into a SegementStore with the predicates. - /// - /// For example, given this input: - /// - /// Projection: #env, #method, #host, #counter, #time - /// Filter: #time GtEq Int64(1590036110000000) - /// TableScan: measurement projection=None - /// - /// The following plan would be produced - /// Projection: #env, #method, #host, #counter, #time - /// SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000) - /// - fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan { - if let LogicalPlan::Filter { predicate, input } = plan { - // see if the input is a TableScan - if let LogicalPlan::TableScan { .. } = **input { - return make_logical_plan_node(Box::new(SegmentScan::new( - self.store.clone(), - predicate.clone(), - ))); - } - } +// /// Specialized optimizer pass that combines a `TableScan` and a `Filter` +// /// together into a SegementStore with the predicates. +// /// +// /// For example, given this input: +// /// +// /// Projection: #env, #method, #host, #counter, #time +// /// Filter: #time GtEq Int64(1590036110000000) +// /// TableScan: measurement projection=None +// /// +// /// The following plan would be produced +// /// Projection: #env, #method, #host, #counter, #time +// /// SegmentScan: measurement projection=None predicate=: #time GtEq Int64(1590036110000000) +// /// +// fn rewrite_to_segment_scan(&self, plan: &LogicalPlan) -> LogicalPlan { +// if let LogicalPlan::Filter { predicate, input } = plan { +// // see if the input is a TableScan +// if let LogicalPlan::TableScan { .. } = **input { +// return make_logical_plan_node(Box::new(SegmentScan::new( +// self.store.clone(), +// predicate.clone(), +// ))); +// } +// } - // otherwise recursively apply - let optimized_inputs = utils::inputs(&plan) - .iter() - .map(|input| self.rewrite_to_segment_scan(input)) - .collect(); +// // otherwise recursively apply +// let optimized_inputs = utils::inputs(&plan) +// .iter() +// .map(|input| self.rewrite_to_segment_scan(input)) +// .collect(); - return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs) - .expect("Created plan"); - } -} +// return utils::from_plan(plan, &utils::expressions(plan), &optimized_inputs) +// .expect("Created plan"); +// } +// } -/// LogicalPlan node that serves as a scan of the segment store with optional predicates -struct SegmentScan { - /// The underlying Store - store: Arc, +// /// LogicalPlan node that serves as a scan of the segment store with optional predicates +// struct SegmentScan { +// /// The underlying Store +// store: Arc, - schema: SchemaRef, +// schema: SchemaRef, - /// The predicate to apply during the scan - predicate: Expr, -} +// /// The predicate to apply during the scan +// predicate: Expr, +// } -impl<'a> SegmentScan { - fn new(store: Arc, predicate: Expr) -> Self { - let schema = store.schema().clone(); +// impl<'a> SegmentScan { +// fn new(store: Arc, predicate: Expr) -> Self { +// let schema = store.schema().clone(); - SegmentScan { - store, - schema, - predicate, - } - } -} +// SegmentScan { +// store, +// schema, +// predicate, +// } +// } +// } -impl LogicalPlanNode for SegmentScan { - /// Return a reference to the logical plan's inputs - fn inputs(&self) -> Vec<&LogicalPlan> { - Vec::new() - } +// impl LogicalPlanNode for SegmentScan { +// /// Return a reference to the logical plan's inputs +// fn inputs(&self) -> Vec<&LogicalPlan> { +// Vec::new() +// } - /// Get a reference to the logical plan's schema - fn schema(&self) -> &Schema { - self.schema.as_ref() - } +// /// Get a reference to the logical plan's schema +// fn schema(&self) -> &Schema { +// self.schema.as_ref() +// } - /// returns all expressions (non-recursively) in the current logical plan node. - fn expressions(&self) -> Vec { - // The predicate expression gets absorbed by this node As - // there are no inputs, there are no exprs that operate on - // inputs - Vec::new() - } +// /// returns all expressions (non-recursively) in the current logical plan node. +// fn expressions(&self) -> Vec { +// // The predicate expression gets absorbed by this node As +// // there are no inputs, there are no exprs that operate on +// // inputs +// Vec::new() +// } - /// Write a single line human readable string to `f` for use in explain plan - fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "SegmentScan: {:?} predicate {:?}", - self.store.as_ref() as *const Store, - self.predicate - ) - } +// /// Write a single line human readable string to `f` for use in explain plan +// fn format_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +// write!( +// f, +// "SegmentScan: {:?} predicate {:?}", +// self.store.as_ref() as *const Store, +// self.predicate +// ) +// } - /// Create a clone of this node. - /// - /// Note std::Clone needs a Sized type, so we must implement a - /// clone that creates a node with a known Size (i.e. Box) - // - fn dyn_clone(&self) -> Box { - Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) - } +// /// Create a clone of this node. +// /// +// /// Note std::Clone needs a Sized type, so we must implement a +// /// clone that creates a node with a known Size (i.e. Box) +// // +// fn dyn_clone(&self) -> Box { +// Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) +// } - /// Create a clone of this LogicalPlanNode with inputs and expressions replaced. - /// - /// Note that exprs and inputs are in the same order as the result - /// of self.inputs and self.exprs. - /// - /// So, clone_from_template(exprs).exprs() == exprs - fn clone_from_template( - &self, - exprs: &Vec, - inputs: &Vec, - ) -> Box { - assert_eq!(exprs.len(), 0, "no exprs expected"); - assert_eq!(inputs.len(), 0, "no inputs expected"); - Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) - } +// /// Create a clone of this LogicalPlanNode with inputs and expressions replaced. +// /// +// /// Note that exprs and inputs are in the same order as the result +// /// of self.inputs and self.exprs. +// /// +// /// So, clone_from_template(exprs).exprs() == exprs +// fn clone_from_template( +// &self, +// exprs: &Vec, +// inputs: &Vec, +// ) -> Box { +// assert_eq!(exprs.len(), 0, "no exprs expected"); +// assert_eq!(inputs.len(), 0, "no inputs expected"); +// Box::new(SegmentScan::new(self.store.clone(), self.predicate.clone())) +// } - /// Create the corresponding physical scheplan for this node - fn create_physical_plan( - &self, - input_physical_plans: Vec>, - _ctx_state: Arc>, - ) -> datafusion::error::Result> { - assert_eq!(input_physical_plans.len(), 0, "Can not have inputs"); +// /// Create the corresponding physical scheplan for this node +// fn create_physical_plan( +// &self, +// input_physical_plans: Vec>, +// _ctx_state: Arc>, +// ) -> delorean_arrow::datafusion::error::Result> { +// assert_eq!(input_physical_plans.len(), 0, "Can not have inputs"); - // If this were real code, we would now progrmatically - // transform the DataFusion Expr into the specific form needed - // by the Segment. However, to save prototype time we just - // hard code it here instead - assert_eq!( - format!("{:?}", self.predicate), - "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")" - ); +// // If this were real code, we would now progrmatically +// // transform the DataFusion Expr into the specific form needed +// // by the Segment. However, to save prototype time we just +// // hard code it here instead +// assert_eq!( +// format!("{:?}", self.predicate), +// "CAST(#time AS Int64) GtEq Int64(1590036110000000) And CAST(#time AS Int64) Lt Int64(1590040770000000) And #env Eq Utf8(\"prod01-eu-central-1\")" +// ); - let time_range = (1590036110000000, 1590040770000000); - let string_predicate = StringPredicate { - col_name: "env".into(), - value: "prod01-eu-central-1".into(), - }; +// let time_range = (1590036110000000, 1590040770000000); +// let string_predicate = StringPredicate { +// col_name: "env".into(), +// value: "prod01-eu-central-1".into(), +// }; - Ok(Arc::new(SegmentScanExec::new( - self.store.clone(), - time_range, - string_predicate, - ))) - } -} +// Ok(Arc::new(SegmentScanExec::new( +// self.store.clone(), +// time_range, +// string_predicate, +// ))) +// } +// } -#[derive(Debug, Clone)] -struct StringPredicate { - col_name: String, - value: String, -} +// #[derive(Debug, Clone)] +// struct StringPredicate { +// col_name: String, +// value: String, +// } -/// StoreScan execution node -#[derive(Debug)] -pub struct SegmentScanExec { - store: Arc, +// /// StoreScan execution node +// #[derive(Debug)] +// pub struct SegmentScanExec { +// store: Arc, - // Specialized predicates to apply - time_range: (i64, i64), - string_predicate: StringPredicate, -} +// // Specialized predicates to apply +// time_range: (i64, i64), +// string_predicate: StringPredicate, +// } -impl SegmentScanExec { - fn new(store: Arc, time_range: (i64, i64), string_predicate: StringPredicate) -> Self { - SegmentScanExec { - store, - time_range, - string_predicate, - } - } -} +// impl SegmentScanExec { +// fn new(store: Arc, time_range: (i64, i64), string_predicate: StringPredicate) -> Self { +// SegmentScanExec { +// store, +// time_range, +// string_predicate, +// } +// } +// } -impl ExecutionPlan for SegmentScanExec { - fn schema(&self) -> SchemaRef { - self.store.schema() - } +// impl ExecutionPlan for SegmentScanExec { +// fn schema(&self) -> SchemaRef { +// self.store.schema() +// } - fn partitions(&self) -> datafusion::error::Result>> { - let store = self.store.clone(); - Ok(vec![Arc::new(SegmentPartition { - store, - time_range: self.time_range, - string_predicate: self.string_predicate.clone(), - })]) - } -} +// fn partitions(&self) -> delorean_arrow::datafusion::error::Result>> { +// let store = self.store.clone(); +// Ok(vec![Arc::new(SegmentPartition { +// store, +// time_range: self.time_range, +// string_predicate: self.string_predicate.clone(), +// })]) +// } +// } -#[derive(Debug)] -struct SegmentPartition { - store: Arc, - time_range: (i64, i64), - string_predicate: StringPredicate, -} +// #[derive(Debug)] +// struct SegmentPartition { +// store: Arc, +// time_range: (i64, i64), +// string_predicate: StringPredicate, +// } -impl Partition for SegmentPartition { - fn execute( - &self, - ) -> datafusion::error::Result>> { - let combined_results: Vec> = vec![]; +// impl Partition for SegmentPartition { +// fn execute( +// &self, +// ) -> delorean_arrow::datafusion::error::Result>> +// { +// let combined_results: Vec> = vec![]; - let segments = self.store.segments(); +// let segments = self.store.segments(); - // prepare the string predicates in the manner Segments want them - let col_name = &self.string_predicate.col_name; - let scalar = column::Scalar::String(&self.string_predicate.value); +// // prepare the string predicates in the manner Segments want them +// let col_name = &self.string_predicate.col_name; +// let scalar = column::Scalar::String(&self.string_predicate.value); - // Here - let _columns = segments.read_filter_eq( - self.time_range, - &[(col_name, Some(scalar))], - vec![ - "env".to_string(), - "method".to_string(), - "host".to_string(), - "counter".to_string(), - "time".to_string(), - ], - ); +// // Here +// let _columns = segments.read_filter_eq( +// self.time_range, +// &[(col_name, Some(scalar))], +// vec![ +// "env".to_string(), +// "method".to_string(), +// "host".to_string(), +// "counter".to_string(), +// "time".to_string(), +// ], +// ); - // If we were implementing this for real, we would not convert - // `columns` into RecordBatches and feed them back out +// // If we were implementing this for real, we would not convert +// // `columns` into RecordBatches and feed them back out - Ok(Arc::new(Mutex::new(RecordBatchIterator::new( - self.store.schema().clone(), - combined_results, - )))) - } -} +// Ok(Arc::new(Mutex::new(RecordBatchIterator::new( +// self.store.schema().clone(), +// combined_results, +// )))) +// } +// } diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 659e1028e9..cda3563189 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -8,30 +8,21 @@ use std::{ sync::Arc, }; -use arrow::record_batch::{RecordBatch, RecordBatchReader}; -use arrow::{array, array::Array, datatypes, ipc}; - -use delorean_mem_qe::column; -use delorean_mem_qe::column::{AggregateType, Column}; -use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment}; -use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; -use parquet::arrow::arrow_reader::ArrowReader; - -// use snafu::ensure; use datatypes::TimeUnit; use snafu::Snafu; +use delorean_arrow::arrow::array::StringArrayOps; +use delorean_arrow::arrow::record_batch::{RecordBatch, RecordBatchReader}; +use delorean_arrow::arrow::{array, array::Array, datatypes, ipc}; +use delorean_arrow::parquet::arrow::arrow_reader::ArrowReader; +use delorean_mem_qe::column; +use delorean_mem_qe::column::{AggregateType, Column}; +use delorean_mem_qe::segment::{ColumnType, GroupingStrategy, Schema, Segment}; +use delorean_mem_qe::Store; +// use delorean_mem_qe::{adapter::DeloreanQueryEngine, Store}; + #[derive(Snafu, Debug, Clone, Copy, PartialEq)] -pub enum Error { - // #[snafu(display(r#"Too many sort columns specified"#))] -// TooManyColumns, - -// #[snafu(display(r#"Same column specified as sort column multiple times"#))] -// RepeatedColumns { index: usize }, - -// #[snafu(display(r#"Specified column index is out bounds"#))] -// OutOfBoundsColumn { index: usize }, -} +pub enum Error {} fn format_size(sz: usize) -> String { human_format::Formatter::new().format(sz as f64) @@ -88,9 +79,11 @@ fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> path ); - let parquet_reader = parquet::file::reader::SerializedFileReader::new(r).unwrap(); - let mut reader = - parquet::arrow::arrow_reader::ParquetFileArrowReader::new(Rc::new(parquet_reader)); + let parquet_reader = + delorean_arrow::parquet::file::reader::SerializedFileReader::new(r).unwrap(); + let mut reader = delorean_arrow::parquet::arrow::arrow_reader::ParquetFileArrowReader::new( + Rc::new(parquet_reader), + ); let batch_size = 60000; let record_batch_reader = reader.get_record_reader(batch_size).unwrap(); build_store(record_batch_reader, store, sort_order) @@ -419,32 +412,32 @@ fn time_select_with_pred(store: &Store) { // // Use the hard coded timestamp values 1590036110000000, 1590040770000000 -fn time_datafusion_select_with_pred(store: Arc) { - let mut query_engine = DeloreanQueryEngine::new(store); +// fn time_datafusion_select_with_pred(store: Arc) { +// let mut query_engine = DeloreanQueryEngine::new(store); - let sql_string = r#"SELECT env, method, host, counter, time - FROM measurement - WHERE time::BIGINT >= 1590036110000000 - AND time::BIGINT < 1590040770000000 - AND env = 'prod01-eu-central-1' - "#; +// let sql_string = r#"SELECT env, method, host, counter, time +// FROM measurement +// WHERE time::BIGINT >= 1590036110000000 +// AND time::BIGINT < 1590040770000000 +// AND env = 'prod01-eu-central-1' +// "#; - let repeat = 100; - let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut track = 0; - for _ in 0..repeat { - let now = std::time::Instant::now(); - track += query_engine.run_sql(&sql_string); - total_time += now.elapsed(); - } - println!( - "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}", - repeat, - total_time, - total_time / repeat, - track - ); -} +// let repeat = 100; +// let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); +// let mut track = 0; +// for _ in 0..repeat { +// let now = std::time::Instant::now(); +// track += query_engine.run_sql(&sql_string); +// total_time += now.elapsed(); +// } +// println!( +// "time_datafusion_select_with_pred ran {:?} in {:?} {:?} / call {:?}", +// repeat, +// total_time, +// total_time / repeat, +// track +// ); +// } // // SELECT env, method, host, counter, time diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index ee63e51201..d2686f7e5d 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -1,6 +1,8 @@ use std::collections::BTreeSet; use std::convert::From; +use delorean_arrow::arrow; + use super::encoding; #[derive(Debug)] @@ -1331,21 +1333,20 @@ impl AggregatableByRange for &Column { } } -use arrow::array::{Float64Array, Int64Array, TimestampMicrosecondArray}; -impl From for Column { +impl From for Column { fn from(arr: arrow::array::Float64Array) -> Self { Self::Float(NumericColumn::from(arr)) } } -impl From for Column { - fn from(arr: TimestampMicrosecondArray) -> Self { +impl From for Column { + fn from(arr: arrow::array::TimestampMicrosecondArray) -> Self { Self::Integer(NumericColumn::from(arr)) } } -impl From for Column { - fn from(arr: Int64Array) -> Self { +impl From for Column { + fn from(arr: arrow::array::Int64Array) -> Self { Self::Integer(NumericColumn::from(arr)) } } @@ -1682,7 +1683,9 @@ where } } -use arrow::array::Array; +use delorean_arrow::arrow::array::Array; +use delorean_arrow::arrow::array::PrimitiveArrayOps; + impl From for NumericColumn { fn from(arr: arrow::array::Float64Array) -> Self { let len = arr.len(); diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 09467bea63..3b33ed5e8e 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -2,8 +2,9 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::iter; use std::mem::size_of; -use arrow::array::{Array, PrimitiveArray}; -use arrow::datatypes::ArrowNumericType; +use delorean_arrow::arrow::array::PrimitiveArrayOps; +use delorean_arrow::arrow::array::{Array, PrimitiveArray}; +use delorean_arrow::arrow::datatypes::ArrowNumericType; pub trait NumericEncoding: Send + Sync + std::fmt::Display + std::fmt::Debug { type Item; diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index 29bb987eac..ad32402145 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -5,7 +5,7 @@ pub mod encoding; pub mod segment; pub mod sorter; -use arrow::datatypes::SchemaRef; +use delorean_arrow::arrow::datatypes::SchemaRef; use segment::{Segment, Segments}; #[derive(Debug, Default)] diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index fdde84f430..f42a90e31d 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use super::column; use super::column::{AggregateType, Column}; -use arrow::datatypes::SchemaRef; +use delorean_arrow::arrow::datatypes::SchemaRef; // Only used in a couple of specific places for experimentation. const THREADS: usize = 16; @@ -1812,12 +1812,12 @@ pub struct GroupedAggregates<'a> { #[cfg(test)] mod test { - use arrow::datatypes::*; + use delorean_arrow::arrow::datatypes::*; #[test] fn segment_group_key_sorted() { let schema = super::Schema::with_sort_order( - arrow::datatypes::SchemaRef::new(Schema::new(vec![ + delorean_arrow::arrow::datatypes::SchemaRef::new(Schema::new(vec![ Field::new("env", DataType::Utf8, false), Field::new("role", DataType::Utf8, false), Field::new("path", DataType::Utf8, false), From 9eee0c2852ccf81cfedc4fa3bc65a8b12042b7ba Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 24 Sep 2020 17:11:14 +0100 Subject: [PATCH 66/73] refactor: make clippy happy --- Cargo.lock | 1 + delorean_ingest/src/lib.rs | 7 ++++--- delorean_mem_qe/Cargo.toml | 1 + delorean_mem_qe/benches/encoding.rs | 10 ++++------ delorean_mem_qe/src/bin/main.rs | 30 +++++++++++++---------------- delorean_mem_qe/src/column.rs | 8 ++++---- delorean_mem_qe/src/encoding.rs | 19 +++++++++--------- delorean_mem_qe/src/lib.rs | 1 + delorean_mem_qe/src/segment.rs | 12 +++++------- delorean_table/src/sorter.rs | 12 ++++-------- 10 files changed, 46 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6f96b0298..8cd8705170 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -765,6 +765,7 @@ name = "delorean_mem_qe" version = "0.1.0" dependencies = [ "chrono", + "criterion", "croaring", "crossbeam", "delorean_arrow", diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs index 9a0223e8b7..45a9374d9e 100644 --- a/delorean_ingest/src/lib.rs +++ b/delorean_ingest/src/lib.rs @@ -813,9 +813,9 @@ impl TSMFileConverter { println!("verifying order"); let values = packed_columns[12].i64_packer_mut().values(); let mut last = values[0]; - for i in 1..values.len() { - assert!(values[i] >= last); - last = values[i]; + for &v in values.iter().skip(1) { + assert!(v >= last); + last = v; } println!("finished sort in {:?}", now.elapsed()); @@ -823,6 +823,7 @@ impl TSMFileConverter { write_arrow_file(schema, packed_columns).unwrap(); println!("Done!"); + let _ = self.table_writer_source; // if packed_columns.len() < 13 { // continue; // } diff --git a/delorean_mem_qe/Cargo.toml b/delorean_mem_qe/Cargo.toml index 5c0fbc3f37..c25677fbe5 100644 --- a/delorean_mem_qe/Cargo.toml +++ b/delorean_mem_qe/Cargo.toml @@ -22,3 +22,4 @@ human_format = "1.0.3" [dev-dependencies] +criterion = "0.3" \ No newline at end of file diff --git a/delorean_mem_qe/benches/encoding.rs b/delorean_mem_qe/benches/encoding.rs index 504ce64c8d..34ad9108ee 100644 --- a/delorean_mem_qe/benches/encoding.rs +++ b/delorean_mem_qe/benches/encoding.rs @@ -24,7 +24,7 @@ fn benchmark_row_ids( let mut input = delorean_mem_qe::encoding::DictionaryRLE::new(); let values = batch_size / cardinality; for i in 0..cardinality { - input.push_additional(i.to_string().as_str(), values as u64); + input.push_additional(Some(i.to_string()), values as u64); } group.throughput(Throughput::Bytes(batch_size as u64)); @@ -35,9 +35,7 @@ fn benchmark_row_ids( b.iter(|| { // do work for i in 0..cardinality { - let ids = input - .row_ids(i.to_string().as_str()) - .collect::>(); + let _ = input.row_ids(Some(i.to_string())).collect::>(); } }); }, @@ -68,7 +66,7 @@ fn benchmark_row_ids_roaring( let mut input = delorean_mem_qe::encoding::DictionaryRLE::new(); let values = batch_size / cardinality; for i in 0..cardinality { - input.push_additional(i.to_string().as_str(), values as u64); + input.push_additional(Some(i.to_string()), values as u64); } group.throughput(Throughput::Bytes(batch_size as u64)); @@ -79,7 +77,7 @@ fn benchmark_row_ids_roaring( b.iter(|| { // do work for i in 0..cardinality { - let ids = input.row_ids_roaring(i.to_string().as_str()); + let _ = input.row_ids_eq_roaring(Some(i.to_string())); } }); }, diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index cda3563189..7b5e26c42b 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -54,18 +54,18 @@ fn main() { ); let store = Arc::new(store); - // time_select_with_pred(&store); + time_select_with_pred(&store); // time_datafusion_select_with_pred(store.clone()); - // time_first_host(&store); - // time_sum_range(&store); - // time_count_range(&store); - // time_group_single_with_pred(&store); - // time_group_by_multi_agg_count(&store); - // time_group_by_multi_agg_sorted_count(&store); - // time_window_agg_count(&store); - // time_tag_keys_with_pred(&store); + time_first_host(&store); + time_sum_range(&store); + time_count_range(&store); + time_group_single_with_pred(&store); + time_group_by_multi_agg_count(&store); + time_group_by_multi_agg_sorted_count(&store); + time_window_agg_count(&store); + time_tag_keys_with_pred(&store); time_tag_values_with_pred(&store); - // time_group_by_different_columns(&store); + time_group_by_different_columns(&store); } fn build_parquet_store(path: &str, store: &mut Store, sort_order: Vec<&str>) -> Result<(), Error> { @@ -110,7 +110,6 @@ fn build_store( ) -> Result<(), Error> { let mut total_rows_read = 0; let start = std::time::Instant::now(); - let mut i = 0; loop { let rb = reader.next_batch(); match rb { @@ -269,7 +268,7 @@ fn convert_record_batch(rb: RecordBatch, segment: &mut Segment) -> Result<(), Er datatypes::DataType::Boolean => { panic!("unsupported"); } - ref d @ _ => panic!("unsupported datatype: {:?}", d), + _ => panic!("unsupported datatype"), } } Ok(()) @@ -458,7 +457,7 @@ fn time_group_single_with_pred(store: &Store) { (1588834080000000, 1590044410000000), &[], &"env".to_string(), - &vec![("counter".to_string(), AggregateType::Count)], + &[("counter".to_string(), AggregateType::Count)], ); track += results.len(); } @@ -689,14 +688,12 @@ fn time_group_by_different_columns(store: &Store) { for strat in &strats { let repeat = 10; let mut total_time: std::time::Duration = std::time::Duration::new(0, 0); - let mut total_max = 0; let segments = store.segments(); - for i in 1..=cols.len() { for _ in 0..repeat { let now = std::time::Instant::now(); - let groups = segments.read_group_eq( + segments.read_group_eq( (1589000000000001, 1590044410000000), &[], cols[0..i].to_vec(), @@ -706,7 +703,6 @@ fn time_group_by_different_columns(store: &Store) { ); total_time += now.elapsed(); - total_max += groups.len(); } println!( "time_group_by_different_columns{:?} cols: {:?} ran {:?} in {:?} {:?}", diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index d2686f7e5d..d03eb0c6e1 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -981,7 +981,7 @@ impl Column { Column::String(c) => { if let Scalar::String(v) = value { if let Some(range) = c.meta.range() { - range.1 < v.to_string() + &range.1.as_str() < v } else { false } @@ -1020,7 +1020,7 @@ impl Column { Column::String(c) => { if let Scalar::String(v) = value { if let Some(range) = c.meta.range() { - range.0 > v.to_string() + &range.0.as_str() > v } else { false } @@ -1905,7 +1905,7 @@ pub mod metadata { pub fn size(&self) -> usize { // size of types for num_rows and range - let base_size = size_of::() + (2 * size_of::>()); + size_of::() + (2 * size_of::>()) // // TODO: figure out a way to specify that T must be able to describe its runtime size. @@ -1916,7 +1916,7 @@ pub mod metadata { // (None, Some(max)) => base_size + max.len(), // (Some(min), Some(max)) => base_size + min.len() + max.len(), // } - base_size + // base_size } } diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index 3b33ed5e8e..a3c0e2b6a6 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -107,7 +107,7 @@ where /// supports null values then the values returned are undefined. /// /// encoded_values should not be called on nullable columns. - fn encoded_values(&self, row_ids: &[usize]) -> Vec { + fn encoded_values(&self, _: &[usize]) -> Vec { todo!(); } @@ -117,7 +117,7 @@ where // TODO(edd): problem here is returning a slice because we need to own the // backing vector. - fn scan_from(&self, row_id: usize) -> &[Option] { + fn scan_from(&self, _: usize) -> &[Option] { unimplemented!("need to figure out returning a slice"); // let mut out = Vec::with_capacity(self.arr.len() - row_id); // for i in row_id..self.arr.len() { @@ -185,7 +185,7 @@ where count // if there are no non-null rows the result is 0 rather than NULL } - fn count_by_ids(&self, row_ids: &croaring::Bitmap) -> u64 { + fn count_by_ids(&self, _: &croaring::Bitmap) -> u64 { todo!() } @@ -213,8 +213,8 @@ where fn row_ids_single_cmp_roaring( &self, - wanted: &Self::Item, - order: std::cmp::Ordering, + _: &Self::Item, + _: std::cmp::Ordering, ) -> croaring::Bitmap { todo!() } @@ -224,7 +224,6 @@ where let mut found = false; //self.values[0]; let mut count = 0; - let mut i = 0; for i in 0..self.arr.len() { let next = &self.arr.value(i); if (self.arr.is_null(i) || next < from || next >= to) && found { @@ -385,7 +384,7 @@ where self.values.clone() // TODO(edd):perf probably can return reference to vec. } - fn scan_from(&self, row_id: usize) -> &[Option] { + fn scan_from(&self, _: usize) -> &[Option] { unimplemented!("this should probably take a destination vector or maybe a closure"); // &self.values[row_id..] } @@ -1311,9 +1310,9 @@ mod test { let east = Some("east".to_string()); let north = Some("north".to_string()); drle.push_additional(west.clone(), 3); - drle.push_additional(east.clone(), 2); - drle.push_additional(north.clone(), 4); - drle.push_additional(west.clone(), 3); + drle.push_additional(east, 2); + drle.push_additional(north, 4); + drle.push_additional(west, 3); let results = drle.encoded_values(&[0, 1, 4, 5]); diff --git a/delorean_mem_qe/src/lib.rs b/delorean_mem_qe/src/lib.rs index ad32402145..b625096f1f 100644 --- a/delorean_mem_qe/src/lib.rs +++ b/delorean_mem_qe/src/lib.rs @@ -1,4 +1,5 @@ #![deny(rust_2018_idioms)] +#![allow(clippy::type_complexity)] pub mod adapter; pub mod column; pub mod encoding; diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index f42a90e31d..66a6299a74 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -131,8 +131,8 @@ impl Segment { } /// Determines if the segment contains a column with the provided name. - pub fn has_column(&self, name: &String) -> bool { - self.meta.column_names.contains(name) + pub fn has_column(&self, name: &str) -> bool { + self.meta.column_names.contains(&name.to_string()) } /// column returns the column with name @@ -445,9 +445,7 @@ impl Segment { // // TODO(edd): this is probably a bit of a perf suck. for (col_name, row_value) in &aggregate_row { - for &mut (cum_col_name, agg_type, ref mut cum_agg_value) in - group_key_entry.iter_mut() - { + for &mut (cum_col_name, _, ref mut cum_agg_value) in group_key_entry.iter_mut() { if col_name != cum_col_name { continue; } @@ -995,8 +993,8 @@ impl Segment { &self, time_range: (i64, i64), predicates: &[(&str, Option>)], - group_column: &String, - aggregates: &Vec<(String, column::AggregateType)>, + group_column: &str, + aggregates: &[(String, column::AggregateType)], ) -> BTreeMap)>> { let mut grouped_results = BTreeMap::new(); diff --git a/delorean_table/src/sorter.rs b/delorean_table/src/sorter.rs index 7911b1b310..f242913db2 100644 --- a/delorean_table/src/sorter.rs +++ b/delorean_table/src/sorter.rs @@ -186,14 +186,10 @@ fn packers_sorted_asc(packers: &[Packers], len: usize, sort_by: &[usize]) -> boo } Packers::Integer(p) => { let vec = p.values(); - if vec[i - 1] < vec[i] { - continue 'row_wise; - } else if vec[i - 1] == vec[i] { - // try next column - continue; - } else { - // value is > so - return false; + match vec[i - 1].cmp(&vec[i]) { + Ordering::Less => continue 'row_wise, + Ordering::Equal => continue, + Ordering::Greater => return false, } } _ => continue, // don't compare on non-string / timestamp cols From 10511ae08b3ad87829b037d7d2716893e211db3b Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 10:03:56 +0100 Subject: [PATCH 67/73] refactor: restore tsm ingest --- delorean_ingest/src/lib.rs | 132 +++---------------------------------- 1 file changed, 9 insertions(+), 123 deletions(-) diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs index 45a9374d9e..16bf2d7773 100644 --- a/delorean_ingest/src/lib.rs +++ b/delorean_ingest/src/lib.rs @@ -719,132 +719,18 @@ impl TSMFileConverter { match next_measurement { Some(mut table) => { - if table.name != "http_api_requests_total" { - continue; - } // convert (potentially merged) measurement.. - let (schema, mut packed_columns) = + let (schema, packed_columns) = Self::process_measurement_table(&mut block_reader, &mut table)?; + let mut table_writer = self + .table_writer_source + .next_writer(&schema) + .context(WriterCreation)?; - // println!("col def {:?}", schema.get_col_defs()); - // // cardinality - // for (i, col) in packed_columns.iter().enumerate() { - // println!("processing column {:?}", i); - // if let Packers::String(p) = col { - // let mut set: std::collections::BTreeSet<_> = BTreeSet::new(); - // for v in p.iter() { - // if let Some(v) = v { - // set.insert(String::from(v.as_utf8().unwrap())); - // } - // } - // println!("Cardinality for col is {:?}", set.len()); - // } - // } - // col def [ColumnDefinition { name: "env", index: 0, data_type: String }, - // ColumnDefinition { name: "handler", index: 1, data_type: String }, - // ColumnDefinition { name: "host", index: 2, data_type: String }, - // ColumnDefinition { name: "hostname", index: 3, data_type: String }, - // ColumnDefinition { name: "method", index: 4, data_type: String }, - // ColumnDefinition { name: "nodename", index: 5, data_type: String }, - // ColumnDefinition { name: "path", index: 6, data_type: String }, - // ColumnDefinition { name: "role", index: 7, data_type: String }, - // ColumnDefinition { name: "status", index: 8, data_type: String }, - // ColumnDefinition { name: "url", index: 9, data_type: String }, - // ColumnDefinition { name: "user_agent", index: 10, data_type: String }, - // ColumnDefinition { name: "counter", index: 11, data_type: Float }, - // ColumnDefinition { name: "time", index: 12, data_type: Timestamp }] - // processing column 0 - // Cardinality for col is 8 - // processing column 1 - // Cardinality for col is 8 - // processing column 2 - // Cardinality for col is 3005 - // processing column 3 - // Cardinality for col is 3005 - // processing column 4 - // Cardinality for col is 6 - // processing column 5 - // Cardinality for col is 148 - // processing column 6 - // Cardinality for col is 78 - // processing column 7 - // Cardinality for col is 14 - // processing column 8 - // Cardinality for col is 4 - // processing column 9 - // Cardinality for col is 6 - // processing column 10 - // Cardinality for col is 71 - // processing column 11 - // processing column 12 - // got all card - // println!("got all card"); - - // sort low to high == - // - // status 8 (4) - // method 4 (6) - // url 9 (6) - // env 0 (8) - // handler 1 (8) - // role 7 (14) - // user_agent 10 (71) - // path 6 (78) - // nodename 5 (148) - // host 2 (3005) - // hostname 3 (3005) - // - // time 12 - - if packed_columns.len() < 13 { - continue; - } - - println!("length of column s is {:?}", packed_columns.len()); - // let sort = [0, 7, 6, 12]; - // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; - // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; - let sort = [12]; - println!("Starting sort with {:?}", sort); - let now = std::time::Instant::now(); - - delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap(); - - println!("verifying order"); - let values = packed_columns[12].i64_packer_mut().values(); - let mut last = values[0]; - for &v in values.iter().skip(1) { - assert!(v >= last); - last = v; - } - println!("finished sort in {:?}", now.elapsed()); - - println!("Writing to arrow file!"); - write_arrow_file(schema, packed_columns).unwrap(); - println!("Done!"); - - let _ = self.table_writer_source; - // if packed_columns.len() < 13 { - // continue; - // } - // println!("length of column s is {:?}", packed_columns.len()); - // // let sort = [0, 7, 6, 12]; - // // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; - // let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; - // println!("Starting sort with {:?}", sort); - // let now = std::time::Instant::now(); - // delorean_table::sorter::sort(&mut packed_columns, &sort).unwrap(); - // println!("finished sort in {:?}", now.elapsed()); - - // let mut table_writer = self - // .table_writer_source - // .next_writer(&schema) - // .context(WriterCreation)?; - - // table_writer - // .write_batch(&packed_columns) - // .context(WriterCreation)?; - // table_writer.close().context(WriterCreation)?; + table_writer + .write_batch(&packed_columns) + .context(WriterCreation)?; + table_writer.close().context(WriterCreation)?; } None => break, } From d8fd1db0948b03740992a073319ee41544787d88 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 10:05:14 +0100 Subject: [PATCH 68/73] test: fix test --- delorean_mem_qe/src/encoding.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/delorean_mem_qe/src/encoding.rs b/delorean_mem_qe/src/encoding.rs index a3c0e2b6a6..26bc940d80 100644 --- a/delorean_mem_qe/src/encoding.rs +++ b/delorean_mem_qe/src/encoding.rs @@ -1055,10 +1055,7 @@ mod test { arr: super::PrimitiveArray::from(vec![Some(2.3), Some(44.56), None]), }; - // let encoded = col.all(); - // assert_eq!(encoded, vec![Some(2.3), Some(44.56), None]); - - let sum = col.sum_by_id_range(0, 1); + let sum = col.sum_by_id_range(0, 2); assert_eq!(sum, Some(46.86)); } From 6fcf0fd656bbe93744e27b4e0a82a8b79b221909 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 10:12:30 +0100 Subject: [PATCH 69/73] refactor: fix clippy --- delorean_ingest/src/lib.rs | 264 ++++++++++++++++++------------------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/delorean_ingest/src/lib.rs b/delorean_ingest/src/lib.rs index 16bf2d7773..ae680e5da1 100644 --- a/delorean_ingest/src/lib.rs +++ b/delorean_ingest/src/lib.rs @@ -11,7 +11,7 @@ use delorean_line_parser::{FieldValue, ParsedLine}; use delorean_table::{ - packers::{Packer, PackerChunker, Packers}, + packers::{Packer, Packers}, ByteArray, DeloreanTableWriter, DeloreanTableWriterSource, Error as TableError, }; use delorean_table_schema::{DataType, Schema, SchemaBuilder}; @@ -508,153 +508,153 @@ fn pack_lines<'a>(schema: &Schema, lines: &[ParsedLine<'a>]) -> Vec { packers } -use arrow::array; -use arrow::datatypes; -use arrow::ipc::writer; -use arrow::record_batch; -use std::fs::File; -use std::sync::Arc; +// use arrow::array; +// use arrow::datatypes; +// use arrow::ipc::writer; +// use arrow::record_batch; +// use std::fs::File; +// use std::sync::Arc; -fn arrow_datatype(datatype: DataType) -> datatypes::DataType { - match datatype { - DataType::Float => datatypes::DataType::Float64, - DataType::Integer => datatypes::DataType::Int64, - DataType::String => datatypes::DataType::Utf8, - // DataType::String => datatypes::DataType::Dictionary( - // std::boxed::Box::new(datatypes::DataType::Int16), - // std::boxed::Box::new(datatypes::DataType::Utf8), - // ), - DataType::Boolean => datatypes::DataType::Boolean, - DataType::Timestamp => datatypes::DataType::Int64, - } -} +// fn arrow_datatype(datatype: DataType) -> datatypes::DataType { +// match datatype { +// DataType::Float => datatypes::DataType::Float64, +// DataType::Integer => datatypes::DataType::Int64, +// DataType::String => datatypes::DataType::Utf8, +// // DataType::String => datatypes::DataType::Dictionary( +// // std::boxed::Box::new(datatypes::DataType::Int16), +// // std::boxed::Box::new(datatypes::DataType::Utf8), +// // ), +// DataType::Boolean => datatypes::DataType::Boolean, +// DataType::Timestamp => datatypes::DataType::Int64, +// } +// } -fn write_arrow_file(parquet_schema: Schema, packers: Vec) -> Result<(), Error> { - let file = File::create("/tmp/http_api_requests_total.arrow").unwrap(); +// fn write_arrow_file(parquet_schema: Schema, packers: Vec) -> Result<(), Error> { +// let file = File::create("/tmp/http_api_requests_total.arrow").unwrap(); - let mut record_batch_fields: Vec = vec![]; - // no default() on Field... - record_batch_fields.resize( - parquet_schema.get_col_defs().len(), - datatypes::Field::new("foo", datatypes::DataType::Int64, false), - ); +// let mut record_batch_fields: Vec = vec![]; +// // no default() on Field... +// record_batch_fields.resize( +// parquet_schema.get_col_defs().len(), +// datatypes::Field::new("foo", datatypes::DataType::Int64, false), +// ); - for col_def in parquet_schema.get_col_defs() { - let nullable = col_def.data_type != DataType::Timestamp; - // if col_def.data_type == DataType::Timestamp { - // nullable = false; - // } else { - // nullable = true; - // } +// for col_def in parquet_schema.get_col_defs() { +// let nullable = col_def.data_type != DataType::Timestamp; +// // if col_def.data_type == DataType::Timestamp { +// // nullable = false; +// // } else { +// // nullable = true; +// // } - record_batch_fields[col_def.index as usize] = datatypes::Field::new( - col_def.name.as_str(), - arrow_datatype(col_def.data_type), - nullable, - ); - } - println!("{:?}", record_batch_fields); - println!("{:?}", parquet_schema.get_col_defs()); - let schema = datatypes::Schema::new(record_batch_fields); +// record_batch_fields[col_def.index as usize] = datatypes::Field::new( +// col_def.name.as_str(), +// arrow_datatype(col_def.data_type), +// nullable, +// ); +// } +// println!("{:?}", record_batch_fields); +// println!("{:?}", parquet_schema.get_col_defs()); +// let schema = datatypes::Schema::new(record_batch_fields); - let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap(); +// let mut writer = writer::StreamWriter::try_new(file, &schema).unwrap(); - // let num_rows = packers[0].num_rows(); - let batch_size = 60_000; +// // let num_rows = packers[0].num_rows(); +// let batch_size = 60_000; - let mut packer_chunkers: Vec> = vec![]; - for packer in &packers { - packer_chunkers.push(packer.chunk_values(batch_size)); - } +// let mut packer_chunkers: Vec> = vec![]; +// for packer in &packers { +// packer_chunkers.push(packer.chunk_values(batch_size)); +// } - loop { - let mut chunked_packers: Vec = Vec::with_capacity(packers.len()); - for chunker in &mut packer_chunkers { - match chunker { - PackerChunker::Float(c) => { - if let Some(chunk) = c.next() { - chunked_packers.push(Packers::Float(Packer::from(chunk))); - } - } - PackerChunker::Integer(c) => { - if let Some(chunk) = c.next() { - chunked_packers.push(Packers::Integer(Packer::from(chunk))); - } - } - PackerChunker::String(c) => { - if let Some(chunk) = c.next() { - chunked_packers.push(Packers::String(Packer::from(chunk))); - } - } - PackerChunker::Boolean(c) => { - if let Some(chunk) = c.next() { - chunked_packers.push(Packers::Boolean(Packer::from(chunk))); - } - } - } - } +// loop { +// let mut chunked_packers: Vec = Vec::with_capacity(packers.len()); +// for chunker in &mut packer_chunkers { +// match chunker { +// PackerChunker::Float(c) => { +// if let Some(chunk) = c.next() { +// chunked_packers.push(Packers::Float(Packer::from(chunk))); +// } +// } +// PackerChunker::Integer(c) => { +// if let Some(chunk) = c.next() { +// chunked_packers.push(Packers::Integer(Packer::from(chunk))); +// } +// } +// PackerChunker::String(c) => { +// if let Some(chunk) = c.next() { +// chunked_packers.push(Packers::String(Packer::from(chunk))); +// } +// } +// PackerChunker::Boolean(c) => { +// if let Some(chunk) = c.next() { +// chunked_packers.push(Packers::Boolean(Packer::from(chunk))); +// } +// } +// } +// } - if chunked_packers.is_empty() { - break; - } +// if chunked_packers.is_empty() { +// break; +// } - // let sort = [0, 7, 6, 12]; - // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; - let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; - delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap(); +// // let sort = [0, 7, 6, 12]; +// // let sort = [8, 4, 9, 0, 1, 7, 10, 6, 5, 2, 3, 12]; +// let sort = [3, 2, 5, 6, 10, 7, 1, 0, 9, 4, 8, 12]; +// delorean_table::sorter::sort(&mut chunked_packers, &sort).unwrap(); - println!( - "Writing {:?} packers with size: {:?}", - chunked_packers.len(), - chunked_packers[0].num_rows() - ); - write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers); - } +// println!( +// "Writing {:?} packers with size: {:?}", +// chunked_packers.len(), +// chunked_packers[0].num_rows() +// ); +// write_arrow_batch(&mut writer, Arc::new(schema.clone()), chunked_packers); +// } - writer.finish().unwrap(); - Ok(()) -} +// writer.finish().unwrap(); +// Ok(()) +// } -fn write_arrow_batch( - w: &mut writer::StreamWriter, - schema: Arc, - packers: Vec, -) { - let mut record_batch_arrays: Vec = vec![]; +// fn write_arrow_batch( +// w: &mut writer::StreamWriter, +// schema: Arc, +// packers: Vec, +// ) { +// let mut record_batch_arrays: Vec = vec![]; - for packer in packers { - match packer { - Packers::Float(p) => { - record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec()))); - } - Packers::Integer(p) => { - record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec()))); - } - Packers::String(p) => { - let mut builder = array::StringBuilder::new(p.num_rows()); - for v in p.values() { - match v { - Some(v) => { - builder.append_value(v.as_utf8().unwrap()).unwrap(); - } - None => { - builder.append_null().unwrap(); - } - } - } - let array = builder.finish(); - record_batch_arrays.push(Arc::new(array)); - } - Packers::Boolean(p) => { - let array = array::BooleanArray::from(p.values().to_vec()); - record_batch_arrays.push(Arc::new(array)); - } - } - } +// for packer in packers { +// match packer { +// Packers::Float(p) => { +// record_batch_arrays.push(Arc::new(array::Float64Array::from(p.values().to_vec()))); +// } +// Packers::Integer(p) => { +// record_batch_arrays.push(Arc::new(array::Int64Array::from(p.values().to_vec()))); +// } +// Packers::String(p) => { +// let mut builder = array::StringBuilder::new(p.num_rows()); +// for v in p.values() { +// match v { +// Some(v) => { +// builder.append_value(v.as_utf8().unwrap()).unwrap(); +// } +// None => { +// builder.append_null().unwrap(); +// } +// } +// } +// let array = builder.finish(); +// record_batch_arrays.push(Arc::new(array)); +// } +// Packers::Boolean(p) => { +// let array = array::BooleanArray::from(p.values().to_vec()); +// record_batch_arrays.push(Arc::new(array)); +// } +// } +// } - let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap(); - w.write(&record_batch).unwrap(); -} +// let record_batch = record_batch::RecordBatch::try_new(schema, record_batch_arrays).unwrap(); +// w.write(&record_batch).unwrap(); +// } /// Converts one or more TSM files into the delorean_table internal columnar /// data format and then passes that converted data to a `DeloreanTableWriter`. From 27e81fab2da089780a1d8980af8265b7394947d5 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 11:24:32 +0100 Subject: [PATCH 70/73] refactor: rework values/scalars --- delorean_mem_qe/src/bin/main.rs | 2 +- delorean_mem_qe/src/column.rs | 202 +++++++++++++++----------------- delorean_mem_qe/src/segment.rs | 96 +++++++-------- 3 files changed, 137 insertions(+), 163 deletions(-) diff --git a/delorean_mem_qe/src/bin/main.rs b/delorean_mem_qe/src/bin/main.rs index 7b5e26c42b..9c039e2fee 100644 --- a/delorean_mem_qe/src/bin/main.rs +++ b/delorean_mem_qe/src/bin/main.rs @@ -380,7 +380,7 @@ fn time_select_with_pred(store: &Store) { let columns = segments.read_filter_eq( (1590036110000000, 1590040770000000), - &[("env", Some(column::Scalar::String("prod01-eu-central-1")))], + &[("env", "prod01-eu-central-1")], vec![ "env".to_string(), "method".to_string(), diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index d03eb0c6e1..25de6af868 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -15,23 +15,19 @@ pub enum Set<'a> { pub enum Value<'a> { Null, String(&'a str), - Scalar(Scalar<'a>), + Scalar(Scalar), } #[derive(Debug, PartialEq, PartialOrd, Clone)] -pub enum Scalar<'a> { - String(&'a str), +pub enum Scalar { Float(f64), Integer(i64), Unsigned32(u32), } -impl<'a> Scalar<'a> { +impl Scalar { pub fn reset(&mut self) { match self { - Scalar::String(_s) => { - panic!("not supported"); - } Scalar::Float(v) => { *v = 0.0; } @@ -44,7 +40,7 @@ impl<'a> Scalar<'a> { } } - pub fn add(&mut self, other: Scalar<'a>) { + pub fn add(&mut self, other: Scalar) { match self { Self::Float(v) => { if let Self::Float(other) = other { @@ -67,17 +63,14 @@ impl<'a> Scalar<'a> { panic!("invalid"); }; } - Self::String(_) => { - unreachable!("not possible to add strings"); - } } } } -impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> { - type Output = Scalar<'a>; +impl<'a> std::ops::Add<&Scalar> for &mut Scalar { + type Output = Scalar; - fn add(self, _rhs: &Scalar<'a>) -> Self::Output { + fn add(self, _rhs: &Scalar) -> Self::Output { match *self { Scalar::Float(v) => { if let Scalar::Float(other) = _rhs { @@ -100,17 +93,14 @@ impl<'a> std::ops::Add<&Scalar<'a>> for &mut Scalar<'a> { panic!("invalid"); } } - Scalar::String(_) => { - unreachable!("not possible to add strings"); - } } } } -impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { - type Output = Scalar<'a>; +impl<'a> std::ops::Add<&Scalar> for Scalar { + type Output = Scalar; - fn add(self, _rhs: &Scalar<'a>) -> Self::Output { + fn add(self, _rhs: &Scalar) -> Self::Output { match self { Self::Float(v) => { if let Self::Float(other) = _rhs { @@ -133,15 +123,12 @@ impl<'a> std::ops::Add<&Scalar<'a>> for Scalar<'a> { panic!("invalid"); } } - Self::String(_) => { - unreachable!("not possible to add strings"); - } } } } -impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> { - fn add_assign(&mut self, _rhs: &Scalar<'a>) { +impl<'a> std::ops::AddAssign<&Scalar> for &mut Scalar { + fn add_assign(&mut self, _rhs: &Scalar) { match self { Scalar::Float(v) => { if let Scalar::Float(other) = _rhs { @@ -164,15 +151,12 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for &mut Scalar<'a> { panic!("invalid"); }; } - Scalar::String(_) => { - unreachable!("not possible to add strings"); - } } } } -impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { - fn add_assign(&mut self, _rhs: &Scalar<'a>) { +impl<'a> std::ops::AddAssign<&Scalar> for Scalar { + fn add_assign(&mut self, _rhs: &Scalar) { match self { Self::Float(v) => { if let Self::Float(other) = _rhs { @@ -195,19 +179,16 @@ impl<'a> std::ops::AddAssign<&Scalar<'a>> for Scalar<'a> { panic!("invalid"); }; } - Self::String(_) => { - unreachable!("not possible to add strings"); - } } } } #[derive(Clone, Debug)] -pub enum Aggregate<'a> { +pub enum Aggregate { Count(u64), // Sum can be `None` is for example all values being aggregated are themselves // `None`. - Sum(Option>), + Sum(Option), } #[derive(Debug, Clone)] @@ -216,10 +197,10 @@ pub enum AggregateType { Sum, } -// impl<'a> std::ops::Add<&Option>> for Aggregate<'a> { -// type Output = Aggregate<'a>; +// impl<'a> std::ops::Add<&Option> for Aggregate { +// type Output = Aggregate; -// fn add(self, _rhs: &Option>) -> Self::Output { +// fn add(self, _rhs: &Option) -> Self::Output { // match self { // Self::Count(self_count) => match _rhs { // Some(other_scalar) => match other_scalar { @@ -252,10 +233,10 @@ pub enum AggregateType { // } // } -// impl<'a> std::ops::Add<&Aggregate<'a>> for Aggregate<'a> { -// type Output = Aggregate<'a>; +// impl<'a> std::ops::Add<&Aggregate> for Aggregate { +// type Output = Aggregate; -// fn add(self, _rhs: &Aggregate<'a>) -> Self::Output { +// fn add(self, _rhs: &Aggregate) -> Self::Output { // match self { // Self::Count(self_count) => { // if let Self::Count(other) = _rhs { @@ -288,7 +269,7 @@ pub trait AggregatableByRange { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate<'_>; + ) -> Aggregate; } /// A Vector is a materialised vector of values from a column. @@ -317,7 +298,7 @@ impl<'a> Vector<'a> { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate<'a> { + ) -> Aggregate { match agg_type { AggregateType::Count => { Aggregate::Count(self.count_by_id_range(from_row_id, to_row_id)) @@ -328,7 +309,7 @@ impl<'a> Vector<'a> { // Return the sum of values in the vector. NULL values are ignored. If there // are no non-null values in the vector being summed then None is returned. - fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option> { + fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { match self { Self::NullString(_) => { panic!("can't sum strings...."); @@ -528,7 +509,7 @@ impl<'a> Vector<'a> { /// position `i` is NULL then `None` is returned. // // TODO - sort out - pub fn get_scalar(&self, i: usize) -> Option> { + pub fn get_scalar(&self, i: usize) -> Option { match self { Self::NullString(_) => panic!("unsupported get_scalar"), Self::NullFloat(v) => match v[i] { @@ -575,7 +556,7 @@ impl AggregatableByRange for &Vector<'_> { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate<'_> { + ) -> Aggregate { Vector::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) } } @@ -621,7 +602,7 @@ impl<'a> NullVectorIterator<'a> { } } impl<'a> Iterator for NullVectorIterator<'a> { - type Item = Option>; + type Item = Option; fn next(&mut self) -> Option { let curr_i = self.next_i; @@ -696,39 +677,39 @@ impl Column { /// Materialise the decoded value matching the provided logical /// row id. - pub fn value(&self, row_id: usize) -> Option> { + pub fn value<'a>(&'a self, row_id: usize) -> Value<'a> { match self { Column::String(c) => { if row_id >= self.num_rows() { - return None; + return Value::Null; } match c.value(row_id) { - Some(v) => Some(Scalar::String(v)), - None => None, + Some(v) => Value::String(v), + None => Value::Null, } } Column::Float(c) => { if row_id >= self.num_rows() { - return None; + return Value::Null; } let v = c.value(row_id); if let Some(v) = v { - return Some(Scalar::Float(v)); + return Value::Scalar(Scalar::Float(v)); } - None + Value::Null } Column::Integer(c) => { if row_id >= self.num_rows() { - return None; + return Value::Null; } let v = c.value(row_id); if let Some(v) = v { - return Some(Scalar::Integer(v)); + return Value::Scalar(Scalar::Integer(v)); } - None + Value::Null } } } @@ -949,37 +930,37 @@ impl Column { } } - pub fn maybe_contains(&self, value: &Scalar<'_>) -> bool { + pub fn maybe_contains(&self, value: &Value<'_>) -> bool { match self { Column::String(c) => { - if let Scalar::String(v) = value { + if let Value::String(v) = value { c.meta.maybe_contains_value(v.to_string()) + } else { + panic!("a String column cannot contain a non-string value"); + } + } + Column::Float(c) => { + if let Value::Scalar(Scalar::Float(v)) = value { + c.meta.maybe_contains_value(*v) } else { panic!("invalid value"); } } - Column::Float(c) => { - if let Scalar::Float(v) = value { - c.meta.maybe_contains_value(*v) - } else { - panic!("invalid value or unsupported null"); - } - } Column::Integer(c) => { - if let Scalar::Integer(v) = value { + if let Value::Scalar(Scalar::Integer(v)) = value { c.meta.maybe_contains_value(*v) } else { - panic!("invalid value or unsupported null"); + panic!("invalid value"); } } } } /// returns true if the column cannot contain - pub fn max_less_than(&self, value: &Scalar<'_>) -> bool { + pub fn max_less_than(&self, value: &Value<'_>) -> bool { match self { Column::String(c) => { - if let Scalar::String(v) = value { + if let Value::String(v) = value { if let Some(range) = c.meta.range() { &range.1.as_str() < v } else { @@ -990,7 +971,7 @@ impl Column { } } Column::Float(c) => { - if let Scalar::Float(v) = value { + if let Value::Scalar(Scalar::Float(v)) = value { if let Some(range) = c.meta.range() { range.1 < *v } else { @@ -1001,7 +982,7 @@ impl Column { } } Column::Integer(c) => { - if let Scalar::Integer(v) = value { + if let Value::Scalar(Scalar::Integer(v)) = value { if let Some(range) = c.meta.range() { range.1 < *v } else { @@ -1015,10 +996,10 @@ impl Column { } // TODO(edd): consolodate with max_less_than... Should just be single cmp function - pub fn min_greater_than(&self, value: &Scalar<'_>) -> bool { + pub fn min_greater_than(&self, value: &Value<'_>) -> bool { match self { Column::String(c) => { - if let Scalar::String(v) = value { + if let Value::String(v) = value { if let Some(range) = c.meta.range() { &range.0.as_str() > v } else { @@ -1029,7 +1010,7 @@ impl Column { } } Column::Float(c) => { - if let Scalar::Float(v) = value { + if let Value::Scalar(Scalar::Float(v)) = value { if let Some(range) = c.meta.range() { range.0 > *v } else { @@ -1040,7 +1021,7 @@ impl Column { } } Column::Integer(c) => { - if let Scalar::Integer(v) = value { + if let Value::Scalar(Scalar::Integer(v)) = value { if let Some(range) = c.meta.range() { range.0 > *v } else { @@ -1054,43 +1035,43 @@ impl Column { } /// Returns the minimum value contained within this column. - pub fn min(&self) -> Option> { + pub fn min(&self) -> Value<'_> { match self { Column::String(c) => match c.meta.range() { - Some(range) => Some(Scalar::String(&range.0)), - None => None, + Some(range) => Value::String(&range.0), + None => Value::Null, }, Column::Float(c) => match c.meta.range() { - Some(range) => Some(Scalar::Float(range.0)), - None => None, + Some(range) => Value::Scalar(Scalar::Float(range.0)), + None => Value::Null, }, Column::Integer(c) => match c.meta.range() { - Some(range) => Some(Scalar::Integer(range.0)), - None => None, + Some(range) => Value::Scalar(Scalar::Integer(range.0)), + None => Value::Null, }, } } /// Returns the maximum value contained within this column. // FIXME(edd): Support NULL integers and floats - pub fn max(&self) -> Option> { + pub fn max(&self) -> Value<'_> { match self { Column::String(c) => match c.meta.range() { - Some(range) => Some(Scalar::String(&range.1)), - None => None, + Some(range) => Value::String(&range.1), + None => Value::Null, }, Column::Float(c) => match c.meta.range() { - Some(range) => Some(Scalar::Float(range.1)), - None => None, + Some(range) => Value::Scalar(Scalar::Float(range.1)), + None => Value::Null, }, Column::Integer(c) => match c.meta.range() { - Some(range) => Some(Scalar::Integer(range.1)), - None => None, + Some(range) => Value::Scalar(Scalar::Integer(range.1)), + None => Value::Null, }, } } - pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option> { + pub fn sum_by_ids(&self, row_ids: &mut croaring::Bitmap) -> Option { match self { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => match c.sum_by_ids(row_ids) { @@ -1106,7 +1087,7 @@ impl Column { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate<'_> { + ) -> Aggregate { match self { Column::String(_) => unimplemented!("not implemented"), Column::Float(c) => match agg_type { @@ -1132,11 +1113,10 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - pub fn row_ids_eq(&self, value: &Option>) -> Option { - let value = match value { - Some(v) => v, - None => return None, - }; + pub fn row_ids_eq(&self, value: &Value<'_>) -> Option { + if let Value::Null = value { + return None; // don't support "IS NULL" yet. + } if !self.maybe_contains(value) { return None; @@ -1144,14 +1124,14 @@ impl Column { self.row_ids(value, std::cmp::Ordering::Equal) } - pub fn row_ids_gt(&self, value: &Scalar<'_>) -> Option { + pub fn row_ids_gt(&self, value: &Value<'_>) -> Option { if self.max_less_than(value) { return None; } self.row_ids(value, std::cmp::Ordering::Greater) } - pub fn row_ids_lt(&self, value: &Scalar<'_>) -> Option { + pub fn row_ids_lt(&self, value: &Value<'_>) -> Option { if self.min_greater_than(value) { return None; } @@ -1164,7 +1144,11 @@ impl Column { // or // // WHERE counter >= 102.2 AND counter < 2929.32 - pub fn row_ids_gte_lt(&self, low: &Scalar<'_>, high: &Scalar<'_>) -> Option { + pub fn row_ids_gte_lt(&self, low: &Value<'_>, high: &Value<'_>) -> Option { + if let (Value::Null, _) | (_, Value::Null) = (low, high) { + panic!("unsupported NULL value in range"); + } + match self { Column::String(_c) => { unimplemented!("not implemented yet"); @@ -1176,7 +1160,9 @@ impl Column { None => return None, }; - if let (Scalar::Float(low), Scalar::Float(high)) = (low, high) { + if let (Value::Scalar(Scalar::Float(low)), Value::Scalar(Scalar::Float(high))) = + (low, high) + { if low <= col_min && high > col_max { // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than @@ -1204,7 +1190,9 @@ impl Column { None => return None, }; - if let (Scalar::Integer(low), Scalar::Integer(high)) = (low, high) { + if let (Value::Scalar(Scalar::Integer(low)), Value::Scalar(Scalar::Integer(high))) = + (low, high) + { if low <= col_min && high > col_max { // In this case the query completely covers the range of the column. // TODO: PERF - need to _not_ return a bitset rather than @@ -1229,28 +1217,28 @@ impl Column { } // TODO(edd) shouldn't let roaring stuff leak out... - fn row_ids(&self, value: &Scalar<'_>, order: std::cmp::Ordering) -> Option { + fn row_ids(&self, value: &Value<'_>, order: std::cmp::Ordering) -> Option { match self { Column::String(c) => { if order != std::cmp::Ordering::Equal { unimplemented!("> < not supported on strings yet"); } - if let Scalar::String(v) = value { + if let Value::String(v) = value { Some(c.data.row_ids_eq_roaring(Some(v.to_string()))) } else { panic!("invalid value"); } } Column::Float(c) => { - if let Scalar::Float(v) = value { + if let Value::Scalar(Scalar::Float(v)) = value { Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); } } Column::Integer(c) => { - if let Scalar::Integer(v) = value { + if let Value::Scalar(Scalar::Integer(v)) = value { Some(c.data.row_ids_single_cmp_roaring(v, order)) } else { panic!("invalid value or unsupported null"); @@ -1328,7 +1316,7 @@ impl AggregatableByRange for &Column { agg_type: &AggregateType, from_row_id: usize, to_row_id: usize, - ) -> Aggregate<'_> { + ) -> Aggregate { Column::aggregate_by_id_range(&self, agg_type, from_row_id, to_row_id) } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 66a6299a74..3cc74aa03c 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -260,11 +260,11 @@ impl Segment { pub fn aggregate_by_group_with_hash<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], group_columns: &[String], aggregates: &'a [(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, column::Aggregate<'a>)>> { + ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, column::Aggregate)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -375,10 +375,10 @@ impl Segment { // hashMap is about 20% faster than BTreeMap in this case let mut hash_table: BTreeMap< Vec, - Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)>, + Vec<(&'a String, &'a AggregateType, column::Aggregate)>, > = BTreeMap::new(); - let mut aggregate_row: Vec<(&str, Option>)> = + let mut aggregate_row: Vec<(&str, Option)> = std::iter::repeat_with(|| ("", None)) .take(aggregate_itrs.len()) .collect(); @@ -424,7 +424,7 @@ impl Segment { // This is cheaper than allocating a key and using the entry API if !hash_table.contains_key(&group_key) { - let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate<'_>)> = + let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate)> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push(( @@ -497,7 +497,7 @@ impl Segment { pub fn aggregate_by_group_using_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, @@ -642,7 +642,7 @@ impl Segment { pub fn aggregate_by_group_using_stream<'a>( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, @@ -870,11 +870,7 @@ impl Segment { vec![] } - pub fn sum_column( - &self, - name: &str, - row_ids: &mut croaring::Bitmap, - ) -> Option> { + pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { if let Some(c) = self.column(name) { return c.sum_by_ids(row_ids); } @@ -892,7 +888,7 @@ impl Segment { pub fn filter_by_predicates_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], ) -> Option { if !self.meta.overlaps_time_range(time_range.0, time_range.1) { return None; // segment doesn't have time range @@ -904,27 +900,27 @@ impl Segment { // so don't need to intersect predicate results with time column. return self.filter_by_predicates_eq_no_time(predicates); } - self.filter_by_predicates_eq_time(time_range, predicates.to_vec()) + self.filter_by_predicates_eq_time(time_range, predicates) } fn filter_by_predicates_eq_time( &self, time_range: (i64, i64), - predicates: Vec<(&str, Option>)>, + predicates: &[(&str, &str)], ) -> Option { // Get all row_ids matching the time range: // // time > time_range.0 AND time < time_range.1 let mut bm = self.columns[self.time_column_idx].row_ids_gte_lt( - &column::Scalar::Integer(time_range.0), - &column::Scalar::Integer(time_range.1), + &column::Value::Scalar(column::Scalar::Integer(time_range.0)), + &column::Value::Scalar(column::Scalar::Integer(time_range.1)), )?; log::debug!("time col bitmap contains {:?} values out of {:?} rows. requested range was {:?}, meta range is {:?}",bm.cardinality(),self.num_rows(), time_range, self.meta.time_range); // now intersect matching rows for each column for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { - match c.row_ids_eq(&col_pred_value) { + match c.row_ids_eq(&column::Value::String(col_pred_value)) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -949,7 +945,7 @@ impl Segment { // meta row_ids bitmap. fn filter_by_predicates_eq_no_time( &self, - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], ) -> Option { if predicates.is_empty() { // In this case there are no predicates provided and we have no time @@ -963,7 +959,7 @@ impl Segment { // now intersect matching rows for each column for (col_pred_name, col_pred_value) in predicates { if let Some(c) = self.column(col_pred_name) { - match c.row_ids_eq(col_pred_value) { + match c.row_ids_eq(&column::Value::String(col_pred_value)) { Some(row_ids) => { if row_ids.is_empty() { return None; @@ -992,10 +988,10 @@ impl Segment { pub fn group_single_agg_by_predicate_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], group_column: &str, aggregates: &[(String, column::AggregateType)], - ) -> BTreeMap)>> { + ) -> BTreeMap> { let mut grouped_results = BTreeMap::new(); let filter_row_ids: croaring::Bitmap; @@ -1011,7 +1007,7 @@ impl Segment { let mut filtered_row_ids = row_ids.and(&filter_row_ids); if !filtered_row_ids.is_empty() { // First calculate all of the aggregates for this grouped value - let mut aggs: Vec<((String, AggregateType), column::Aggregate<'_>)> = + let mut aggs: Vec<((String, AggregateType), column::Aggregate)> = Vec::with_capacity(aggregates.len()); for (col_name, agg) in aggregates { @@ -1083,13 +1079,8 @@ impl Segment { todo!("fast path") } - let pred_vec = predicates - .iter() - .map(|p| (p.0, Some(column::Scalar::String(p.1)))) - .collect::>(); - let filtered_row_ids: croaring::Bitmap; - if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) { + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { filtered_row_ids = row_ids; } else { return None; // no matching rows for predicate + time range @@ -1159,13 +1150,8 @@ impl Segment { todo!("fast path") } - let pred_vec = predicates - .iter() - .map(|p| (p.0, Some(column::Scalar::String(p.1)))) - .collect::>(); - let filtered_row_ids: croaring::Bitmap; - if let Some(row_ids) = self.filter_by_predicates_eq(time_range, pred_vec.as_slice()) { + if let Some(row_ids) = self.filter_by_predicates_eq(time_range, predicates) { filtered_row_ids = row_ids; } else { return None; // no matching rows for predicate + time range @@ -1302,7 +1288,7 @@ impl<'a> Segments<'a> { pub fn read_filter_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], select_columns: Vec, ) -> BTreeMap> { let (min, max) = time_range; @@ -1338,12 +1324,12 @@ impl<'a> Segments<'a> { pub fn read_group_eq( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, strategy: &GroupingStrategy, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { let (min, max) = time_range; if max <= min { panic!("max <= min"); @@ -1388,12 +1374,12 @@ impl<'a> Segments<'a> { fn read_group_eq_hash( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1474,12 +1460,12 @@ impl<'a> Segments<'a> { fn read_group_eq_sort( &self, time_range: (i64, i64), - predicates: &[(&str, Option>)], + predicates: &[(&str, &str)], mut group_columns: Vec, aggregates: Vec<(String, AggregateType)>, window: i64, concurrent: bool, - ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate<'a>)>> { + ) -> BTreeMap, Vec<((String, column::AggregateType), column::Aggregate)>> { if window > 0 { // add time column to the group key group_columns.push("time".to_string()); @@ -1611,16 +1597,16 @@ impl<'a> Segments<'a> { } /// Returns the minimum value for a column in a set of segments. - pub fn column_min(&self, column_name: &str) -> Option> { + pub fn column_min(&self, column_name: &str) -> column::Value<'_> { if self.segments.is_empty() { - return None; + return column::Value::Null; } - let mut min_min: Option> = None; + let mut min_min = column::Value::Null; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let min = segment.columns[i].min(); - if min_min.is_none() { + if let column::Value::Null = min_min { min_min = min } else if min_min > min { min_min = min; @@ -1632,17 +1618,17 @@ impl<'a> Segments<'a> { } /// Returns the maximum value for a column in a set of segments. - pub fn column_max(&self, column_name: &str) -> Option> { + pub fn column_max(&self, column_name: &str) -> column::Value<'_> { if self.segments.is_empty() { - return None; + return column::Value::Null; } - let mut max_max: Option> = None; + let mut max_max = column::Value::Null; for segment in &self.segments { if let Some(i) = segment.column_names().iter().position(|c| c == column_name) { let max = segment.columns[i].max(); - if max_max.is_none() { - max_max = max + if let column::Value::Null = max_max { + max_max = max; } else if max_max < max { max_max = max; } @@ -1660,7 +1646,7 @@ impl<'a> Segments<'a> { /// If the time column has multiple max time values then the result is abitrary. /// /// TODO(edd): could return NULL value.. - pub fn first(&self, column_name: &str) -> Option<(i64, Option>, usize)> { + pub fn first(&self, column_name: &str) -> Option<(i64, column::Value<'_>, usize)> { // First let's find the segment with the earliest time range. // notice we order a < b on max time range. let segment = self @@ -1689,10 +1675,10 @@ impl<'a> Segments<'a> { /// The last value is based on the time column, therefore the returned value /// may not be at the end of the column. /// - /// If the time column has multiple max time values then the result is abitrary. + /// If the time column has multiple max time values then the result is undefined. /// /// TODO(edd): could return NULL value.. - pub fn last(&self, column_name: &str) -> Option<(i64, Option>, usize)> { + pub fn last(&self, column_name: &str) -> Option<(i64, column::Value<'_>, usize)> { // First let's find the segment with the latest time range. // notice we order a > b on max time range. let segment = self @@ -1804,7 +1790,7 @@ pub enum GroupingStrategy { #[derive(Debug)] pub struct GroupedAggregates<'a> { pub group_key: Vec, - pub aggregates: Vec<(&'a String, column::Aggregate<'a>)>, + pub aggregates: Vec<(&'a String, column::Aggregate)>, } #[cfg(test)] From 635121c8013ac70a84db1e91bf1736f56bbd5d47 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 11:28:35 +0100 Subject: [PATCH 71/73] refactor: fix lifetime issue in hashgroup --- delorean_mem_qe/src/segment.rs | 74 ++++------------------------------ 1 file changed, 7 insertions(+), 67 deletions(-) diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 3cc74aa03c..216ee2adf3 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -264,7 +264,7 @@ impl Segment { group_columns: &[String], aggregates: &'a [(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&'a String, &'a AggregateType, column::Aggregate)>> { + ) -> BTreeMap, Vec<(&'a String, column::Aggregate)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -318,7 +318,6 @@ impl Segment { panic!("need to handle no results for filtering/grouping..."); } } - // println!("grouped columns {:?}", group_column_encoded_values); // TODO(edd): we could do this with an iterator I expect. // @@ -373,10 +372,8 @@ impl Segment { .collect::>(); // hashMap is about 20% faster than BTreeMap in this case - let mut hash_table: BTreeMap< - Vec, - Vec<(&'a String, &'a AggregateType, column::Aggregate)>, - > = BTreeMap::new(); + let mut hash_table: BTreeMap, Vec<(&'a String, column::Aggregate)>> = + BTreeMap::new(); let mut aggregate_row: Vec<(&str, Option)> = std::iter::repeat_with(|| ("", None)) @@ -424,12 +421,11 @@ impl Segment { // This is cheaper than allocating a key and using the entry API if !hash_table.contains_key(&group_key) { - let mut agg_results: Vec<(&'a String, &'a AggregateType, column::Aggregate)> = + let mut agg_results: Vec<(&'a String, column::Aggregate)> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push(( col_name, - agg_type, match agg_type { AggregateType::Count => column::Aggregate::Count(0), AggregateType::Sum => column::Aggregate::Sum(None), @@ -445,7 +441,7 @@ impl Segment { // // TODO(edd): this is probably a bit of a perf suck. for (col_name, row_value) in &aggregate_row { - for &mut (cum_col_name, _, ref mut cum_agg_value) in group_key_entry.iter_mut() { + for &mut (cum_col_name, ref mut cum_agg_value) in group_key_entry.iter_mut() { if col_name != cum_col_name { continue; } @@ -465,33 +461,13 @@ impl Segment { } } } - // match cum_agg_value { - // Some(agg) => match agg { - // column::Aggregate::Count(_) => { - // *cum_agg_value = Some(agg + column::Aggregate::Count(Some(1))); - // } - // column::Aggregate::Sum(cum_sum) => { - // *cum_sum += row_value; - // } - // }, - // None => { - // *cum_agg_value = match agg_type { - // AggregateType::Count => Some(column::Aggregate::Count(Some(0))), - // AggregateType::Sum => { - // Some(column::Aggregate::Sum(row_value.clone())) - // } - // } - // } - // } } } processed_rows += 1; } - // println!("groups: {:?}", hash_table.len()); - log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); - BTreeMap::new() - // hash_table + log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); + hash_table } pub fn aggregate_by_group_using_sort( @@ -597,18 +573,6 @@ impl Segment { let group_col_sort_order = &(0..group_columns.len()).collect::>(); super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); - // let group_itrs = all_columns - // .iter() - // .take(group_columns.len()) // only use grouping columns - // .map(|vector| { - // if let column::Vector::Integer(v) = vector { - // v.iter() - // } else { - // panic!("don't support grouping on non-encoded values"); - // } - // }) - // .collect::>(); - let group_itrs = all_columns .iter() .take(group_columns.len()) @@ -706,17 +670,6 @@ impl Segment { }) .collect::>(); - // let group_itrs = group_column_encoded_values - // .iter() - // .map(|vector| { - // if let column::Vector::Integer(v) = vector { - // v.iter() - // } else { - // panic!("don't support grouping on non-encoded values"); - // } - // }) - // .collect::>(); - let mut aggregate_cols = Vec::with_capacity(aggregates.len()); for (column_name, agg_type) in aggregates { aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap())); @@ -741,12 +694,6 @@ impl Segment { .iter_mut() .enumerate() .map(|(i, itr)| { - // if i == group_itrs_len - 1 && window > 0 { - // // time column - apply window function - // return itr.next().unwrap() / window * window; - // } - // *itr.next().unwrap() - if i == group_itrs_len - 1 && window > 0 { // time column - apply window function if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { @@ -785,12 +732,6 @@ impl Segment { .zip(group_itrs.iter_mut()) .enumerate() { - // let next_v = if i == group_itrs_len - 1 && window > 0 { - // // time column - apply window function - // itr.next().unwrap() / window * window - // } else { - // *itr.next().unwrap() - // }; let next_v = if i == group_itrs_len - 1 && window > 0 { // time column - apply window function if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { @@ -864,7 +805,6 @@ impl Segment { aggregates: group_key_aggregates, }); - // println!("groups: {:?}", results.len()); log::debug!("({:?} rows processed) {:?}", processed_rows, results); // results vec![] From 02f036914ca776cd872517869426fa8d5e134e32 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 11:47:11 +0100 Subject: [PATCH 72/73] refactor: vectors comprise scalars --- delorean_mem_qe/src/column.rs | 207 +++++++++++++++++----------------- 1 file changed, 104 insertions(+), 103 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index 25de6af868..a3445fe8c7 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -272,9 +272,9 @@ pub trait AggregatableByRange { ) -> Aggregate; } -/// A Vector is a materialised vector of values from a column. -pub enum Vector<'a> { - NullString(Vec<&'a Option>), +/// A Vector is a materialised vector of scalar values from a column. +pub enum Vector { + // NullString(Vec<&'a Option>), NullFloat(Vec>), NullInteger(Vec>), @@ -292,7 +292,7 @@ pub enum Vector<'a> { // } -impl<'a> Vector<'a> { +impl Vector { pub fn aggregate_by_id_range( &self, agg_type: &AggregateType, @@ -311,9 +311,9 @@ impl<'a> Vector<'a> { // are no non-null values in the vector being summed then None is returned. fn sum_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> Option { match self { - Self::NullString(_) => { - panic!("can't sum strings...."); - } + // Self::NullString(_) => { + // panic!("can't sum strings...."); + // } Self::NullFloat(values) => { let mut res = 0.0; let mut found = false; // TODO(edd): check if this is faster than a match. @@ -382,15 +382,15 @@ impl<'a> Vector<'a> { // to the count. fn count_by_id_range(&self, from_row_id: usize, to_row_id: usize) -> u64 { match self { - Self::NullString(vec) => { - let mut count = 0; - for v in &vec[from_row_id..to_row_id] { - if v.is_some() { - count += 1; - } - } - count as u64 - } + // Self::NullString(vec) => { + // let mut count = 0; + // for v in &vec[from_row_id..to_row_id] { + // if v.is_some() { + // count += 1; + // } + // } + // count as u64 + // } Self::NullFloat(vec) => { let mut count = 0; for v in &vec[from_row_id..to_row_id] { @@ -423,13 +423,13 @@ impl<'a> Vector<'a> { pub fn extend(&mut self, other: Self) { match self { - Self::NullString(v) => { - if let Self::NullString(other) = other { - v.extend(other); - } else { - unreachable!("string can't be extended"); - } - } + // Self::NullString(v) => { + // if let Self::NullString(other) = other { + // v.extend(other); + // } else { + // unreachable!("string can't be extended"); + // } + // } Self::NullFloat(v) => { if let Self::NullFloat(other) = other { v.extend(other); @@ -474,7 +474,7 @@ impl<'a> Vector<'a> { pub fn len(&self) -> usize { match self { - Self::NullString(v) => v.len(), + // Self::NullString(v) => v.len(), Self::NullFloat(v) => v.len(), Self::NullInteger(v) => v.len(), Self::Float(v) => v.len(), @@ -485,33 +485,12 @@ impl<'a> Vector<'a> { /// Return the value within the vector at position `i`. If the value at /// position `i` is NULL then `None` is returned. - pub fn get(&self, i: usize) -> Value<'a> { + pub fn get(&self, i: usize) -> Option { match self { - Self::NullString(v) => match v[i] { - Some(v) => Value::String(v), - None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()), - }, - Self::NullFloat(v) => match v[i] { - Some(v) => Value::Scalar(Scalar::Float(v)), - None => Value::Null, - }, - Self::NullInteger(v) => match v[i] { - Some(v) => Value::Scalar(Scalar::Integer(v)), - None => Value::Null, - }, - Self::Float(v) => Value::Scalar(Scalar::Float(v[i])), - Self::Integer(v) => Value::Scalar(Scalar::Integer(v[i])), - Self::Unsigned32(v) => Value::Scalar(Scalar::Unsigned32(v[i])), - } - } - - /// Return the value within the vector at position `i`. If the value at - /// position `i` is NULL then `None` is returned. - // - // TODO - sort out - pub fn get_scalar(&self, i: usize) -> Option { - match self { - Self::NullString(_) => panic!("unsupported get_scalar"), + // Self::NullString(v) => match v[i] { + // Some(v) => Value::String(v), + // None => Value::Null, // Scalar::String(v[i].as_ref().unwrap()), + // }, Self::NullFloat(v) => match v[i] { Some(v) => Some(Scalar::Float(v)), None => None, @@ -526,11 +505,32 @@ impl<'a> Vector<'a> { } } + /// Return the value within the vector at position `i`. If the value at + /// position `i` is NULL then `None` is returned. + // + // TODO - sort out + // pub fn get_scalar(&self, i: usize) -> Option { + // match self { + // Self::NullString(_) => panic!("unsupported get_scalar"), + // Self::NullFloat(v) => match v[i] { + // Some(v) => Some(Scalar::Float(v)), + // None => None, + // }, + // Self::NullInteger(v) => match v[i] { + // Some(v) => Some(Scalar::Integer(v)), + // None => None, + // }, + // Self::Float(v) => Some(Scalar::Float(v[i])), + // Self::Integer(v) => Some(Scalar::Integer(v[i])), + // Self::Unsigned32(v) => Some(Scalar::Unsigned32(v[i])), + // } + // } + pub fn swap(&mut self, a: usize, b: usize) { match self { - Self::NullString(v) => { - v.swap(a, b); - } + // Self::NullString(v) => { + // v.swap(a, b); + // } Self::NullFloat(v) => { v.swap(a, b); } @@ -550,7 +550,7 @@ impl<'a> Vector<'a> { } } -impl AggregatableByRange for &Vector<'_> { +impl AggregatableByRange for &Vector { fn aggregate_by_id_range( &self, agg_type: &AggregateType, @@ -561,18 +561,18 @@ impl AggregatableByRange for &Vector<'_> { } } -pub struct VectorIterator<'a> { - v: &'a Vector<'a>, +pub struct VectorIterator { + v: Vector, next_i: usize, } -impl<'a> VectorIterator<'a> { - pub fn new(v: &'a Vector<'a>) -> Self { +impl VectorIterator { + pub fn new(v: Vector) -> Self { Self { v, next_i: 0 } } } -impl<'a> Iterator for VectorIterator<'a> { - type Item = Value<'a>; +impl Iterator for VectorIterator { + type Item = Option; fn next(&mut self) -> Option { let curr_i = self.next_i; @@ -591,37 +591,36 @@ impl<'a> Iterator for VectorIterator<'a> { /// /// /// TODO - need to figure this out - currently only returns scalars -pub struct NullVectorIterator<'a> { - v: &'a Vector<'a>, - next_i: usize, -} +// pub struct NullVectorIterator { +// v: Vector, +// next_i: usize, +// } -impl<'a> NullVectorIterator<'a> { - pub fn new(v: &'a Vector<'a>) -> Self { - Self { v, next_i: 0 } - } -} -impl<'a> Iterator for NullVectorIterator<'a> { - type Item = Option; +// impl NullVectorIterator { +// pub fn new(v: Vector) -> Self { +// Self { v, next_i: 0 } +// } +// } +// impl Iterator for NullVectorIterator { +// type Item = Option; - fn next(&mut self) -> Option { - let curr_i = self.next_i; - self.next_i += 1; +// fn next(&mut self) -> Option { +// let curr_i = self.next_i; +// self.next_i += 1; - if curr_i == self.v.len() { - return None; - } - - Some(self.v.get_scalar(curr_i)) - } -} +// if curr_i == self.v.len() { +// return None; +// } +// Some(self.v.get_scalar(curr_i)) +// } +// } use chrono::prelude::*; -impl<'a> std::fmt::Display for Vector<'a> { +impl std::fmt::Display for Vector { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::NullString(v) => write!(f, "{:?}", v), + // Self::NullString(v) => write!(f, "{:?}", v), Self::NullFloat(v) => write!(f, "{:?}", v), Self::NullInteger(v) => { for x in v.iter() { @@ -723,14 +722,15 @@ impl Column { // which take up more memory and mean we can't do fast counts (since we need // to check each value is non-null). // - pub fn values(&self, row_ids: &[usize]) -> Vector<'_> { + pub fn values(&self, row_ids: &[usize]) -> Vector { match self { Column::String(c) => { - if row_ids.is_empty() { - return Vector::NullString(vec![]); - } + panic!("unsupported at the moment") + // if row_ids.is_empty() { + // return Vector::NullString(vec![]); + // } - Vector::NullString(c.values(row_ids)) + // Vector::NullString(c.values(row_ids)) } Column::Float(c) => { if row_ids.is_empty() { @@ -758,19 +758,20 @@ impl Column { /// Materialise all of the decoded values matching the provided logical /// row ids within the bitmap - pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { + pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { match self { Column::String(c) => { - if row_ids.is_empty() { - return Vector::NullString(vec![]); - } + unreachable!("unsupported at the moment"); + // if row_ids.is_empty() { + // return Vector::NullString(vec![]); + // } - let row_id_vec = row_ids - .to_vec() - .iter() - .map(|v| *v as usize) - .collect::>(); - Vector::NullString(c.values(&row_id_vec)) + // let row_id_vec = row_ids + // .to_vec() + // .iter() + // .map(|v| *v as usize) + // .collect::>(); + // Vector::NullString(c.values(&row_id_vec)) } Column::Float(c) => { if row_ids.is_empty() { @@ -801,7 +802,7 @@ impl Column { /// Materialise all of the encoded values matching the provided logical /// row ids. - pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { + pub fn encoded_values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { let now = std::time::Instant::now(); let row_ids_vec = row_ids .to_vec() @@ -840,7 +841,7 @@ impl Column { /// Materialise all of the encoded values matching the provided logical /// row ids. - pub fn encoded_values(&self, row_ids: &[usize]) -> Vector<'_> { + pub fn encoded_values(&self, row_ids: &[usize]) -> Vector { match self { Column::String(c) => { if row_ids.is_empty() { @@ -872,7 +873,7 @@ impl Column { } /// Materialise all of the encoded values. - pub fn all_encoded_values(&self) -> Vector<'_> { + pub fn all_encoded_values(&self) -> Vector { match self { Column::String(c) => { let now = std::time::Instant::now(); @@ -908,7 +909,7 @@ impl Column { } /// materialise rows for each row_id - pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector<'_> { + pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { let now = std::time::Instant::now(); let row_ids_vec = row_ids .to_vec() @@ -924,7 +925,7 @@ impl Column { row_ids_vec[0] ); match self { - Column::String(c) => Vector::NullString(c.values(&row_ids_vec)), + Column::String(c) => panic!("unsupported"), //Vector::NullString(c.values(&row_ids_vec)), Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)), Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)), } From 068fc8fc77655b3085d6a388dcc53a7b5d2d78b3 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 25 Sep 2020 13:32:32 +0100 Subject: [PATCH 73/73] refactor: refactor lifetimes --- delorean_mem_qe/src/column.rs | 50 +++++++++++--- delorean_mem_qe/src/segment.rs | 123 ++++++++++++++++++--------------- delorean_mem_qe/src/sorter.rs | 12 ++-- 3 files changed, 115 insertions(+), 70 deletions(-) diff --git a/delorean_mem_qe/src/column.rs b/delorean_mem_qe/src/column.rs index a3445fe8c7..449252d92d 100644 --- a/delorean_mem_qe/src/column.rs +++ b/delorean_mem_qe/src/column.rs @@ -18,6 +18,40 @@ pub enum Value<'a> { Scalar(Scalar), } +pub enum Values<'a> { + String(Vec<&'a Option>), + Float(Vec>), + Integer(Vec>), +} + +impl Values<'_> { + pub fn extend(&mut self, other: Self) { + match self { + Self::String(v) => { + if let Self::String(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::Float(v) => { + if let Self::Float(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + Self::Integer(v) => { + if let Self::Integer(other) = other { + v.extend(other); + } else { + unreachable!("string can't be extended"); + } + } + } + } +} + #[derive(Debug, PartialEq, PartialOrd, Clone)] pub enum Scalar { Float(f64), @@ -550,7 +584,7 @@ impl Vector { } } -impl AggregatableByRange for &Vector { +impl AggregatableByRange for Vector { fn aggregate_by_id_range( &self, agg_type: &AggregateType, @@ -676,7 +710,7 @@ impl Column { /// Materialise the decoded value matching the provided logical /// row id. - pub fn value<'a>(&'a self, row_id: usize) -> Value<'a> { + pub fn value(&'_ self, row_id: usize) -> Value<'_> { match self { Column::String(c) => { if row_id >= self.num_rows() { @@ -724,7 +758,7 @@ impl Column { // pub fn values(&self, row_ids: &[usize]) -> Vector { match self { - Column::String(c) => { + Column::String(_) => { panic!("unsupported at the moment") // if row_ids.is_empty() { // return Vector::NullString(vec![]); @@ -760,7 +794,7 @@ impl Column { /// row ids within the bitmap pub fn values_bitmap(&self, row_ids: &croaring::Bitmap) -> Vector { match self { - Column::String(c) => { + Column::String(_) => { unreachable!("unsupported at the moment"); // if row_ids.is_empty() { // return Vector::NullString(vec![]); @@ -909,7 +943,7 @@ impl Column { } /// materialise rows for each row_id - pub fn rows(&self, row_ids: &croaring::Bitmap) -> Vector { + pub fn rows(&self, row_ids: &croaring::Bitmap) -> Values<'_> { let now = std::time::Instant::now(); let row_ids_vec = row_ids .to_vec() @@ -925,9 +959,9 @@ impl Column { row_ids_vec[0] ); match self { - Column::String(c) => panic!("unsupported"), //Vector::NullString(c.values(&row_ids_vec)), - Column::Float(c) => Vector::NullFloat(c.values(&row_ids_vec)), - Column::Integer(c) => Vector::NullInteger(c.values(&row_ids_vec)), + Column::String(c) => Values::String(c.values(&row_ids_vec)), + Column::Float(c) => Values::Float(c.values(&row_ids_vec)), + Column::Integer(c) => Values::Integer(c.values(&row_ids_vec)), } } diff --git a/delorean_mem_qe/src/segment.rs b/delorean_mem_qe/src/segment.rs index 216ee2adf3..592b0bcc47 100644 --- a/delorean_mem_qe/src/segment.rs +++ b/delorean_mem_qe/src/segment.rs @@ -196,8 +196,8 @@ impl Segment { &self, row_ids: &croaring::Bitmap, columns: &[String], - ) -> BTreeMap> { - let mut rows: BTreeMap> = BTreeMap::new(); + ) -> BTreeMap> { + let mut rows = BTreeMap::new(); if row_ids.is_empty() { // nothing to return return rows; @@ -257,14 +257,14 @@ impl Segment { true } - pub fn aggregate_by_group_with_hash<'a>( + pub fn aggregate_by_group_with_hash( &self, time_range: (i64, i64), predicates: &[(&str, &str)], group_columns: &[String], - aggregates: &'a [(String, AggregateType)], + aggregates: &[(String, AggregateType)], window: i64, - ) -> BTreeMap, Vec<(&'a String, column::Aggregate)>> { + ) -> BTreeMap, Vec<(&String, column::Aggregate)>> { // Build a hash table - essentially, scan columns for matching row ids, // emitting the encoded value for each column and track those value // combinations in a hashmap with running aggregates. @@ -325,7 +325,7 @@ impl Segment { // aggregating on. let mut aggregate_column_decoded_values = Vec::with_capacity(aggregates.len()); for (column_name, _) in aggregates { - let column_name: &'a String = column_name; + // let column_name: &String = column_name; if let Some(column) = self.column(&column_name) { let decoded_values = column.values(&filtered_row_ids_vec); @@ -349,7 +349,7 @@ impl Segment { // are grouping on. For columns that have no matching rows from the // filtering stage we will just emit None. let mut group_itrs = group_column_encoded_values - .iter() + .into_iter() .map(|vector| match vector { column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column @@ -361,19 +361,15 @@ impl Segment { // are aggregating on. For columns that have no matching rows from the // filtering stage we will just emit None. let mut aggregate_itrs = aggregate_column_decoded_values - .iter() + .into_iter() .map(|(col_name, values)| match values { - Some(values) => ( - col_name.as_str(), - Some(column::NullVectorIterator::new(values)), - ), + Some(values) => (col_name.as_str(), Some(column::VectorIterator::new(values))), None => (col_name.as_str(), None), }) .collect::>(); // hashMap is about 20% faster than BTreeMap in this case - let mut hash_table: BTreeMap, Vec<(&'a String, column::Aggregate)>> = - BTreeMap::new(); + let mut hash_table: BTreeMap, Vec<(&String, column::Aggregate)>> = BTreeMap::new(); let mut aggregate_row: Vec<(&str, Option)> = std::iter::repeat_with(|| ("", None)) @@ -388,16 +384,17 @@ impl Segment { group_itrs.iter_mut().enumerate().for_each(|(i, itr)| { if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + // + // TODO(edd): this is assuming non-null timestamps + if let Some(Some(column::Scalar::Integer(v))) = itr.next() { group_key[i] = v / window * window; } else { unreachable!( "something broken with grouping! Either processed None or wrong type" ); } - } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = - itr.next() - { + // The double Some is ok because encoded values are always non-null + } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() { group_key[i] = v as i64 } else { unreachable!( @@ -421,7 +418,7 @@ impl Segment { // This is cheaper than allocating a key and using the entry API if !hash_table.contains_key(&group_key) { - let mut agg_results: Vec<(&'a String, column::Aggregate)> = + let mut agg_results: Vec<(&String, column::Aggregate)> = Vec::with_capacity(aggregates.len()); for (col_name, agg_type) in aggregates { agg_results.push(( @@ -467,7 +464,8 @@ impl Segment { } log::debug!("({:?} rows processed) {:?}", processed_rows, hash_table); - hash_table + BTreeMap::new() + // hash_table } pub fn aggregate_by_group_using_sort( @@ -477,7 +475,7 @@ impl Segment { group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> Vec> { + ) -> Vec { log::debug!("aggregate_by_group_with_sort_unsorted called"); if window > 0 { @@ -573,8 +571,27 @@ impl Segment { let group_col_sort_order = &(0..group_columns.len()).collect::>(); super::sorter::sort(&mut all_columns, group_col_sort_order).unwrap(); - let group_itrs = all_columns - .iter() + let mut group_vecs = Vec::with_capacity(group_columns.len()); + let mut agg_vecs = Vec::with_capacity(aggregates.len()); + for (i, vec) in all_columns.into_iter().enumerate() { + if i < group_columns.len() { + group_vecs.push(vec); + } else { + agg_vecs.push(vec); + } + } + + let mut aggregate_cols = Vec::with_capacity(aggregates.len()); + for (sorted_vector, agg) in agg_vecs + .into_iter() + .skip(group_columns.len()) + .zip(aggregates.iter()) + { + aggregate_cols.push((agg.0.clone(), agg.1.clone(), sorted_vector)); + } + + let group_itrs = group_vecs + .into_iter() .take(group_columns.len()) .map(|vector| match vector { column::Vector::Unsigned32(_) => { @@ -585,15 +602,6 @@ impl Segment { }) .collect::>(); - let mut aggregate_cols = Vec::with_capacity(aggregates.len()); - for (sorted_vector, (col_name, agg_type)) in all_columns - .iter() - .skip(group_columns.len()) - .zip(aggregates.iter()) - { - aggregate_cols.push((col_name, agg_type, sorted_vector)); - } - Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window) } @@ -603,14 +611,14 @@ impl Segment { // `aggregate_by_group_using_stream` assumes that all columns being grouped // on are part of the overall segment sort, therefore it does no sorting or // hashing, and just streams aggregates out in order. - pub fn aggregate_by_group_using_stream<'a>( + pub fn aggregate_by_group_using_stream( &self, time_range: (i64, i64), predicates: &[(&str, &str)], group_columns: &[String], aggregates: &[(String, AggregateType)], window: i64, - ) -> Vec> { + ) -> Vec { log::debug!("aggregate_by_group_using_stream called"); if window > 0 { @@ -662,7 +670,7 @@ impl Segment { } let group_itrs = group_column_encoded_values - .iter() + .into_iter() .map(|vector| match vector { column::Vector::Unsigned32(_) => column::VectorIterator::new(vector), // encoded tag columns column::Vector::Integer(_) => column::VectorIterator::new(vector), // encoded (but actually just raw) timestamp column @@ -672,7 +680,11 @@ impl Segment { let mut aggregate_cols = Vec::with_capacity(aggregates.len()); for (column_name, agg_type) in aggregates { - aggregate_cols.push((column_name, agg_type, self.column(&column_name).unwrap())); + aggregate_cols.push(( + column_name.clone(), + agg_type.clone(), + self.column(&column_name).unwrap(), + )); } Self::stream_grouped_aggregates(group_itrs, aggregate_cols, *total_rows as usize, window) @@ -681,12 +693,12 @@ impl Segment { // Once the rows necessary for doing a (windowed) grouped aggregate are // available and appropriately sorted this method will build a result set of // aggregates in a streaming way. - pub fn stream_grouped_aggregates<'a>( - mut group_itrs: Vec>, - aggregate_cols: Vec<(&String, &AggregateType, impl column::AggregatableByRange)>, + pub fn stream_grouped_aggregates( + mut group_itrs: Vec, + aggregate_cols: Vec<(String, AggregateType, impl column::AggregatableByRange)>, total_rows: usize, window: i64, - ) -> Vec> { + ) -> Vec { // this tracks the last seen group key row. When it changes we can emit // the grouped aggregates. let group_itrs_len = &group_itrs.len(); @@ -696,16 +708,17 @@ impl Segment { .map(|(i, itr)| { if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + // + // TODO(edd): this is assuming non-null time column + if let Some(Some(column::Scalar::Integer(v))) = itr.next() { v / window * window } else { unreachable!( "something broken with grouping! Either processed None or wrong type" ); } - } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = - itr.next() - { + // the double some should be ok as encoded values can never be None + } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() { v as i64 } else { unreachable!( @@ -734,16 +747,14 @@ impl Segment { { let next_v = if i == group_itrs_len - 1 && window > 0 { // time column - apply window function - if let Some(column::Value::Scalar(column::Scalar::Integer(v))) = itr.next() { + if let Some(Some(column::Scalar::Integer(v))) = itr.next() { v / window * window } else { unreachable!( "something broken with grouping! Either processed None or wrong type" ); } - } else if let Some(column::Value::Scalar(column::Scalar::Unsigned32(v))) = - itr.next() - { + } else if let Some(Some(column::Scalar::Unsigned32(v))) = itr.next() { v as i64 } else { unreachable!( @@ -767,7 +778,7 @@ impl Segment { group_key_start_row_id + group_size, ); - group_key_aggregates.push((*name, agg_result)); + group_key_aggregates.push((name.clone(), agg_result)); } results.push(GroupedAggregates { @@ -797,7 +808,7 @@ impl Segment { ); // TODO(edd): fix weirdness - group_key_aggregates.push((*name, agg_result)); + group_key_aggregates.push((name.clone(), agg_result)); } results.push(GroupedAggregates { @@ -806,8 +817,8 @@ impl Segment { }); log::debug!("({:?} rows processed) {:?}", processed_rows, results); - // results - vec![] + // vec![] + results } pub fn sum_column(&self, name: &str, row_ids: &mut croaring::Bitmap) -> Option { @@ -1230,13 +1241,13 @@ impl<'a> Segments<'a> { time_range: (i64, i64), predicates: &[(&str, &str)], select_columns: Vec, - ) -> BTreeMap> { + ) -> BTreeMap> { let (min, max) = time_range; if max <= min { panic!("max <= min"); } - let mut columns: BTreeMap> = BTreeMap::new(); + let mut columns: BTreeMap> = BTreeMap::new(); for segment in &self.segments { if !segment.meta.overlaps_time_range(min, max) { continue; // segment doesn't have time range @@ -1728,9 +1739,9 @@ pub enum GroupingStrategy { } #[derive(Debug)] -pub struct GroupedAggregates<'a> { +pub struct GroupedAggregates { pub group_key: Vec, - pub aggregates: Vec<(&'a String, column::Aggregate)>, + pub aggregates: Vec<(String, column::Aggregate)>, } #[cfg(test)] diff --git a/delorean_mem_qe/src/sorter.rs b/delorean_mem_qe/src/sorter.rs index 7dd7d6ced3..0e0a607c4d 100644 --- a/delorean_mem_qe/src/sorter.rs +++ b/delorean_mem_qe/src/sorter.rs @@ -43,7 +43,7 @@ const SORTED_CHECK_SIZE: usize = 1000; /// /// All chosen columns will be sorted in ascending order; the sort is *not* /// stable. -pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), Error> { +pub fn sort(vectors: &mut [column::Vector], sort_by: &[usize]) -> Result<(), Error> { if vectors.is_empty() || sort_by.is_empty() { return Ok(()); } @@ -84,7 +84,7 @@ pub fn sort(vectors: &mut [column::Vector<'_>], sort_by: &[usize]) -> Result<(), Ok(()) } -fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range, sort_by: &[usize]) { +fn quicksort_by(vectors: &mut [column::Vector], range: Range, sort_by: &[usize]) { if range.start >= range.end { return; } @@ -94,7 +94,7 @@ fn quicksort_by(vectors: &mut [column::Vector<'_>], range: Range, sort_by quicksort_by(vectors, pivot + 1..range.end, sort_by); } -fn partition(vectors: &mut [column::Vector<'_>], range: &Range, sort_by: &[usize]) -> usize { +fn partition(vectors: &mut [column::Vector], range: &Range, sort_by: &[usize]) -> usize { let pivot = (range.start + range.end) / 2; let (lo, hi) = (range.start, range.end); if cmp(vectors, pivot as usize, lo as usize, sort_by) == Ordering::Less { @@ -130,7 +130,7 @@ fn partition(vectors: &mut [column::Vector<'_>], range: &Range, sort_by: } } -fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> Ordering { +fn cmp(vectors: &[column::Vector], a: usize, b: usize, sort_by: &[usize]) -> Ordering { for &idx in sort_by { match &vectors[idx] { column::Vector::Unsigned32(p) => { @@ -154,7 +154,7 @@ fn cmp(vectors: &[column::Vector<'_>], a: usize, b: usize, sort_by: &[usize]) -> } #[allow(dead_code)] -fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usize]) -> bool { +fn vectors_sorted_asc(vectors: &[column::Vector], len: usize, sort_by: &[usize]) -> bool { 'row_wise: for i in 1..len { for &idx in sort_by { match &vectors[idx] { @@ -199,7 +199,7 @@ fn vectors_sorted_asc(vectors: &[column::Vector<'_>], len: usize, sort_by: &[usi } // Swap the same pair of elements in each packer column -fn swap(vectors: &mut [column::Vector<'_>], a: usize, b: usize) { +fn swap(vectors: &mut [column::Vector], a: usize, b: usize) { for p in vectors { p.swap(a, b); }