From 15aac65c2cc39f46123ebff91e50fbeda9c414c0 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 8 Jul 2021 16:49:07 -0400 Subject: [PATCH 01/17] fix: Arrange use statements so rustfmt can manage their order --- data_types/src/partition_metadata.rs | 11 ++++++----- query/src/test.rs | 27 ++++++++++++--------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/data_types/src/partition_metadata.rs b/data_types/src/partition_metadata.rs index 90f8021b48..9dbfa05c99 100644 --- a/data_types/src/partition_metadata.rs +++ b/data_types/src/partition_metadata.rs @@ -1,12 +1,13 @@ //! This module contains structs that describe the metadata for a partition //! including schema, summary statistics, and file locations in storage. -use std::{borrow::Cow, mem}; - use serde::{Deserialize, Serialize}; -use std::borrow::Borrow; -use std::iter::FromIterator; -use std::num::NonZeroU64; +use std::{ + borrow::{Borrow, Cow}, + iter::FromIterator, + mem, + num::NonZeroU64, +}; /// Describes the aggregated (across all chunks) summary /// statistics for each column in a partition diff --git a/query/src/test.rs b/query/src/test.rs index 218d083ee6..d782edec1d 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -3,32 +3,29 @@ //! //! AKA it is a Mock -use arrow::{ - array::{ArrayRef, DictionaryArray, Int64Array, StringArray, TimestampNanosecondArray}, - datatypes::{DataType, Int32Type, TimeUnit}, - record_batch::RecordBatch, -}; -use data_types::{ - chunk_metadata::ChunkSummary, - partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary}, -}; -use datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream}; -use futures::StreamExt; - use crate::exec::Executor; use crate::{ exec::stringset::{StringSet, StringSetRef}, DatabaseStore, Predicate, PredicateMatch, QueryChunk, QueryChunkMeta, QueryDatabase, }; - +use arrow::{ + array::{ArrayRef, DictionaryArray, Int64Array, StringArray, TimestampNanosecondArray}, + datatypes::{DataType, Int32Type, TimeUnit}, + record_batch::RecordBatch, +}; +use async_trait::async_trait; +use data_types::{ + chunk_metadata::ChunkSummary, + partition_metadata::{ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary}, +}; +use datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream}; +use futures::StreamExt; use internal_types::{ schema::{ builder::SchemaBuilder, merge::SchemaMerger, InfluxColumnType, Schema, TIME_COLUMN_NAME, }, selection::Selection, }; - -use async_trait::async_trait; use parking_lot::Mutex; use snafu::Snafu; use std::{collections::BTreeMap, sync::Arc}; From d2aadddeef691ff1a6dcd6f2300571ee7221f7ec Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Thu, 8 Jul 2021 16:51:01 -0400 Subject: [PATCH 02/17] refactor: Remove unneeded reassignments --- data_types/src/partition_metadata.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/data_types/src/partition_metadata.rs b/data_types/src/partition_metadata.rs index 9dbfa05c99..62f8ed2979 100644 --- a/data_types/src/partition_metadata.rs +++ b/data_types/src/partition_metadata.rs @@ -45,11 +45,7 @@ pub struct PartitionChunkSummary { impl FromIterator for TableSummary { fn from_iter>(iter: T) -> Self { let mut iter = iter.into_iter(); - let first = iter.next().expect("must contain at least one element"); - let mut s = Self { - name: first.name, - columns: first.columns, - }; + let mut s = iter.next().expect("must contain at least one element"); for other in iter { s.update_from(&other) From 78f1c4fc803dfc52b4f6fc5a146b751295269d4e Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 14:26:25 -0400 Subject: [PATCH 03/17] test: Chunks can only have one table; no need to specify repeatedly This lets us make the name required and always present on TestChunks, and make the ID optional. --- query/src/frontend/reorg.rs | 22 +- query/src/provider.rs | 277 +++++++++++++---------- query/src/test.rs | 96 +++----- src/influxdb_ioxd/rpc/storage/service.rs | 144 ++++++------ 4 files changed, 258 insertions(+), 281 deletions(-) diff --git a/query/src/frontend/reorg.rs b/query/src/frontend/reorg.rs index 366e2fe852..f55b251dda 100644 --- a/query/src/frontend/reorg.rs +++ b/query/src/frontend/reorg.rs @@ -270,21 +270,21 @@ mod test { async fn get_test_chunks() -> (Arc, Vec>) { // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 2 has an extra field, and only 4 fields let chunk2 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_int_field_column("t", "field_int2") - .with_four_rows_of_data("t"), + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_int_field_column("field_int2") + .with_four_rows_of_data(), ); let expected = vec![ diff --git a/query/src/provider.rs b/query/src/provider.rs index a7dbeeac12..69a09e4391 100644 --- a/query/src/provider.rs +++ b/query/src/provider.rs @@ -765,18 +765,31 @@ mod test { // in the duplicate module // c1: no overlaps - let c1 = Arc::new(TestChunk::new(1).with_tag_column_with_stats("t", "tag1", "a", "b")); + let c1 = Arc::new( + TestChunk::new("t") + .with_id(1) + .with_tag_column_with_stats("tag1", "a", "b"), + ); // c2: over lap with c3 - let c2 = Arc::new(TestChunk::new(2).with_tag_column_with_stats("t", "tag1", "c", "d")); + let c2 = Arc::new( + TestChunk::new("t") + .with_id(2) + .with_tag_column_with_stats("tag1", "c", "d"), + ); // c3: overlap with c2 - let c3 = Arc::new(TestChunk::new(3).with_tag_column_with_stats("t", "tag1", "c", "d")); + let c3 = Arc::new( + TestChunk::new("t") + .with_id(3) + .with_tag_column_with_stats("tag1", "c", "d"), + ); // c4: self overlap let c4 = Arc::new( - TestChunk::new(4) - .with_tag_column_with_stats("t", "tag1", "e", "f") + TestChunk::new("t") + .with_id(4) + .with_tag_column_with_stats("tag1", "e", "f") .with_may_contain_pk_duplicates(true), ); @@ -797,11 +810,11 @@ mod test { async fn sort_planning_one_tag_with_time() { // Chunk 1 with 5 rows of data let chunk = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column() + .with_tag_column("tag1") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -851,12 +864,12 @@ mod test { async fn sort_planning_two_tags_with_time() { // Chunk 1 with 5 rows of data let chunk = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -906,12 +919,12 @@ mod test { async fn sort_read_filter_plan_for_two_tags_with_time() { // Chunk 1 with 5 rows of data let chunk = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -943,22 +956,24 @@ mod test { async fn deduplicate_plan_for_overlapped_chunks() { // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(1) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 2 exactly the same with Chunk 1 let chunk2 = Arc::new( - TestChunk::new(2) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(2) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk // the same for 2 chunks @@ -1011,22 +1026,24 @@ mod test { // Same two chunks but only select the field and timestamp, not the tag values // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(1) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 2 exactly the same with Chunk 1 let chunk2 = Arc::new( - TestChunk::new(2) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(2) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); let chunks = vec![chunk1, chunk2]; @@ -1083,30 +1100,33 @@ mod test { // Chunks with different fields / tags, and select a subset // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(1) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 2 same tags, but different fields let chunk2 = Arc::new( - TestChunk::new(2) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_int_field_column("t", "other_field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(2) + .with_time_column() + .with_tag_column("tag1") + .with_int_field_column("other_field_int") + .with_five_rows_of_data(), ); // Chunk 3 exactly the same with Chunk 2 let chunk3 = Arc::new( - TestChunk::new(3) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_int_field_column("t", "other_field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(3) + .with_time_column() + .with_tag_column("tag1") + .with_int_field_column("other_field_int") + .with_five_rows_of_data(), ); let chunks = vec![chunk1, chunk2, chunk3]; @@ -1172,32 +1192,35 @@ mod test { async fn deduplicate_plan_for_overlapped_chunks_with_different_schemas() { // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column("t") - .with_tag_column("t", "tag1") - .with_tag_column("t", "tag2") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(1) + .with_time_column() + .with_tag_column("tag1") + .with_tag_column("tag2") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 2 has two different tags let chunk2 = Arc::new( - TestChunk::new(2) - .with_time_column("t") - .with_tag_column("t", "tag3") - .with_tag_column("t", "tag1") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(2) + .with_time_column() + .with_tag_column("tag3") + .with_tag_column("tag1") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Chunk 3 has just tag3 let chunk3 = Arc::new( - TestChunk::new(3) - .with_time_column("t") - .with_tag_column("t", "tag3") - .with_int_field_column("t", "field_int") - .with_int_field_column("t", "field_int2") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(3) + .with_time_column() + .with_tag_column("tag3") + .with_int_field_column("field_int") + .with_int_field_column("field_int2") + .with_five_rows_of_data(), ); // Requested output schema == the schema for all three @@ -1271,11 +1294,11 @@ mod test { async fn scan_plan_with_one_chunk_no_duplicates() { // Test no duplicate at all let chunk = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -1319,12 +1342,12 @@ mod test { async fn scan_plan_with_one_chunk_with_duplicates() { // Test one chunk with duplicate within let chunk = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) - .with_ten_rows_of_data_some_duplicates("t"), + .with_ten_rows_of_data_some_duplicates(), ); // Datafusion schema of the chunk @@ -1375,12 +1398,12 @@ mod test { async fn scan_plan_with_one_chunk_with_duplicates_subset() { // Test one chunk with duplicate within let chunk = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) - .with_ten_rows_of_data_some_duplicates("t"), + .with_ten_rows_of_data_some_duplicates(), ); let chunks = vec![chunk]; @@ -1440,19 +1463,19 @@ mod test { async fn scan_plan_with_two_overlapped_chunks_with_duplicates() { // test overlapped chunks let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_ten_rows_of_data_some_duplicates("t"), + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_ten_rows_of_data_some_duplicates(), ); let chunk2 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -1509,39 +1532,43 @@ mod test { async fn scan_plan_with_four_chunks() { // This test covers all kind of chunks: overlap, non-overlap without duplicates within, non-overlap with duplicates within let chunk1 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_ten_rows_of_data_some_duplicates("t"), + TestChunk::new("t") + .with_id(1) + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_ten_rows_of_data_some_duplicates(), ); // chunk2 overlaps with chunk 1 let chunk2 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 5, 7000) - .with_tag_column_with_stats("t", "tag1", "AL", "MT") - .with_int_field_column("t", "field_int") - .with_five_rows_of_data("t"), + TestChunk::new("t") + .with_id(2) + .with_time_column_with_stats(5, 7000) + .with_tag_column_with_stats("tag1", "AL", "MT") + .with_int_field_column("field_int") + .with_five_rows_of_data(), ); // chunk3 no overlap, no duplicates within let chunk3 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 8000, 20000) - .with_tag_column_with_stats("t", "tag1", "UT", "WA") - .with_int_field_column("t", "field_int") - .with_three_rows_of_data("t"), + TestChunk::new("t") + .with_id(3) + .with_time_column_with_stats(8000, 20000) + .with_tag_column_with_stats("tag1", "UT", "WA") + .with_int_field_column("field_int") + .with_three_rows_of_data(), ); // chunk3 no overlap, duplicates within let chunk4 = Arc::new( - TestChunk::new(1) - .with_time_column_with_stats("t", 28000, 220000) - .with_tag_column_with_stats("t", "tag1", "UT", "WA") - .with_int_field_column("t", "field_int") + TestChunk::new("t") + .with_id(4) + .with_time_column_with_stats(28000, 220000) + .with_tag_column_with_stats("tag1", "UT", "WA") + .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) - .with_four_rows_of_data("t"), + .with_four_rows_of_data(), ); // Datafusion schema of the chunk diff --git a/query/src/test.rs b/query/src/test.rs index d782edec1d..04e132655f 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -143,7 +143,7 @@ pub struct TestChunk { predicates: Mutex>, /// Table name - table_name: Option, + table_name: String, /// Schema of the table table_schema: Option, @@ -162,13 +162,18 @@ pub struct TestChunk { } impl TestChunk { - pub fn new(id: u32) -> Self { + pub fn new(table_name: impl Into) -> Self { Self { - id, + table_name: table_name.into(), ..Default::default() } } + pub fn with_id(mut self, id: u32) -> Self { + self.id = id; + self + } + /// specify that any call should result in an error with the message /// specified pub fn with_error(mut self, error_message: impl Into) -> Self { @@ -191,11 +196,6 @@ impl TestChunk { } } - /// Register a table with the test chunk and a "dummy" column - pub fn with_table(self, table_name: impl Into) -> Self { - self.with_tag_column(table_name, "dummy_col") - } - /// Set the `may_contain_pk_duplicates` flag pub fn with_may_contain_pk_duplicates(mut self, v: bool) -> Self { self.may_contain_pk_duplicates = v; @@ -203,33 +203,26 @@ impl TestChunk { } /// Register an tag column with the test chunk - pub fn with_tag_column( - self, - table_name: impl Into, - column_name: impl Into, - ) -> Self { - let table_name = table_name.into(); + pub fn with_tag_column(self, column_name: impl Into) -> Self { let column_name = column_name.into(); // make a new schema with the specified column and // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().tag(&column_name).build().unwrap(); - self.add_schema_to_table(table_name, new_column_schema) + self.add_schema_to_table(new_column_schema) } /// Register an tag column with the test chunk pub fn with_tag_column_with_stats( self, - table_name: impl Into, column_name: impl Into, min: &str, max: &str, ) -> Self { - let table_name = table_name.into(); let column_name = column_name.into(); - let mut new_self = self.with_tag_column(&table_name, &column_name); + let mut new_self = self.with_tag_column(&column_name); // Now, find the appropriate column summary and update the stats let column_summary: &mut ColumnSummary = new_self @@ -251,26 +244,17 @@ impl TestChunk { } /// Register a timestamp column with the test chunk - pub fn with_time_column(self, table_name: impl Into) -> Self { - let table_name = table_name.into(); - + pub fn with_time_column(self) -> Self { // make a new schema with the specified column and // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().timestamp().build().unwrap(); - self.add_schema_to_table(table_name, new_column_schema) + self.add_schema_to_table(new_column_schema) } /// Register a timestamp column with the test chunk - pub fn with_time_column_with_stats( - self, - table_name: impl Into, - min: i64, - max: i64, - ) -> Self { - let table_name = table_name.into(); - - let mut new_self = self.with_time_column(&table_name); + pub fn with_time_column_with_stats(self, min: i64, max: i64) -> Self { + let mut new_self = self.with_time_column(); // Now, find the appropriate column summary and update the stats let column_summary: &mut ColumnSummary = new_self @@ -292,11 +276,7 @@ impl TestChunk { } /// Register an int field column with the test chunk - pub fn with_int_field_column( - self, - table_name: impl Into, - column_name: impl Into, - ) -> Self { + pub fn with_int_field_column(self, column_name: impl Into) -> Self { let column_name = column_name.into(); // make a new schema with the specified column and @@ -305,20 +285,10 @@ impl TestChunk { .field(&column_name, DataType::Int64) .build() .unwrap(); - self.add_schema_to_table(table_name, new_column_schema) + self.add_schema_to_table(new_column_schema) } - fn add_schema_to_table( - mut self, - table_name: impl Into, - new_column_schema: Schema, - ) -> Self { - let table_name = table_name.into(); - if let Some(existing_name) = &self.table_name { - assert_eq!(&table_name, existing_name); - } - self.table_name = Some(table_name.clone()); - + fn add_schema_to_table(mut self, new_column_schema: Schema) -> Self { // assume the new schema has exactly a single table assert_eq!(new_column_schema.len(), 1); let (col_type, new_field) = new_column_schema.field(0); @@ -365,7 +335,7 @@ impl TestChunk { let mut table_summary = self .table_summary .take() - .unwrap_or_else(|| TableSummary::new(table_name)); + .unwrap_or_else(|| TableSummary::new(&self.table_name)); table_summary.columns.push(column_summary); self.table_summary = Some(table_summary); @@ -379,7 +349,7 @@ impl TestChunk { /// Prepares this chunk to return a specific record batch with one /// row of non null data. - pub fn with_one_row_of_null_data(mut self, _table_name: impl Into) -> Self { + pub fn with_one_row_of_null_data(mut self) -> Self { //let table_name = table_name.into(); let schema = self .table_schema @@ -425,7 +395,7 @@ impl TestChunk { /// "| UT | RI | 70 | 1970-01-01 00:00:00.000020 |", /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(UT, WA), tag2(RI, SC), time(8000, 20000) - pub fn with_three_rows_of_data(mut self, _table_name: impl Into) -> Self { + pub fn with_three_rows_of_data(mut self) -> Self { let schema = self .table_schema .as_ref() @@ -489,7 +459,7 @@ impl TestChunk { /// "| VT | NC | 50 | 1970-01-01 00:00:00.000210 |", // duplicate of (1) /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(UT, WA), tag2(RI, SC), time(28000, 220000) - pub fn with_four_rows_of_data(mut self, _table_name: impl Into) -> Self { + pub fn with_four_rows_of_data(mut self) -> Self { let schema = self .table_schema .as_ref() @@ -554,7 +524,7 @@ impl TestChunk { /// "| MT | AL | 5 | 1970-01-01 00:00:00.000005 |", /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(AL, MT), tag2(AL, MA), time(5, 7000) - pub fn with_five_rows_of_data(mut self, _table_name: impl Into) -> Self { + pub fn with_five_rows_of_data(mut self) -> Self { let schema = self .table_schema .as_ref() @@ -631,7 +601,7 @@ impl TestChunk { /// "| MT | AL | 30 | 1970-01-01 00:00:00.000005 |", // Duplicate with (3) /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(AL, MT), tag2(AL, MA), time(5, 7000) - pub fn with_ten_rows_of_data_some_duplicates(mut self, _table_name: impl Into) -> Self { + pub fn with_ten_rows_of_data_some_duplicates(mut self) -> Self { //let table_name = table_name.into(); let schema = self .table_schema @@ -730,7 +700,7 @@ impl QueryChunk for TestChunk { } fn table_name(&self) -> &str { - self.table_name.as_deref().unwrap() + &self.table_name } fn may_contain_pk_duplicates(&self) -> bool { @@ -769,17 +739,11 @@ impl QueryChunk for TestChunk { } // otherwise fall back to basic filtering based on table name predicate. - let predicate_match = self - .table_name - .as_ref() - .map(|table_name| { - if !predicate.should_include_table(&table_name) { - PredicateMatch::Zero - } else { - PredicateMatch::Unknown - } - }) - .unwrap_or(PredicateMatch::Unknown); + let predicate_match = if !predicate.should_include_table(&self.table_name) { + PredicateMatch::Zero + } else { + PredicateMatch::Unknown + }; Ok(predicate_match) } diff --git a/src/influxdb_ioxd/rpc/storage/service.rs b/src/influxdb_ioxd/rpc/storage/service.rs index bb08c26bcd..3b87ea47bd 100644 --- a/src/influxdb_ioxd/rpc/storage/service.rs +++ b/src/influxdb_ioxd/rpc/storage/service.rs @@ -1252,13 +1252,13 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk0 = TestChunk::new(0) - .with_predicate_match(PredicateMatch::AtLeastOne) - .with_table("h2o"); + let chunk0 = TestChunk::new("h2o") + .with_id(0) + .with_predicate_match(PredicateMatch::AtLeastOne); - let chunk1 = TestChunk::new(1) - .with_predicate_match(PredicateMatch::AtLeastOne) - .with_table("o2"); + let chunk1 = TestChunk::new("o2") + .with_id(1) + .with_predicate_match(PredicateMatch::AtLeastOne); fixture .test_storage @@ -1346,13 +1346,15 @@ mod tests { let partition_id = 1; // Note multiple tables / measureemnts: - let chunk0 = TestChunk::new(0) - .with_tag_column("m1", "k1") - .with_tag_column("m1", "k2"); + let chunk0 = TestChunk::new("m1") + .with_id(0) + .with_tag_column("k1") + .with_tag_column("k2"); - let chunk1 = TestChunk::new(1) - .with_tag_column("m2", "k3") - .with_tag_column("m2", "k4"); + let chunk1 = TestChunk::new("m2") + .with_id(1) + .with_tag_column("k3") + .with_tag_column("k4"); fixture .test_storage @@ -1414,9 +1416,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -1458,15 +1458,15 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk0 = TestChunk::new(0) + let chunk0 = TestChunk::new("m1") // predicate specifies m4, so this is filtered out - .with_tag_column("m1", "k0"); + .with_tag_column("k0"); - let chunk1 = TestChunk::new(1) - .with_tag_column("m4", "k1") - .with_tag_column("m4", "k2") - .with_tag_column("m4", "k3") - .with_tag_column("m4", "k4"); + let chunk1 = TestChunk::new("m4") + .with_tag_column("k1") + .with_tag_column("k2") + .with_tag_column("k3") + .with_tag_column("k4"); fixture .test_storage @@ -1540,10 +1540,8 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - // predicate specifies m4, so this is filtered out - .with_table("my_table") - .with_error("This is an error"); + // predicate specifies m4, so this is filtered out + let chunk = TestChunk::new("my_table").with_error("This is an error"); fixture .test_storage @@ -1587,10 +1585,10 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -1648,9 +1646,7 @@ mod tests { tag_key: [0].into(), }; - let chunk = TestChunk::new(0) - .with_predicate_match(PredicateMatch::AtLeastOne) - .with_table("h2o"); + let chunk = TestChunk::new("h2o").with_predicate_match(PredicateMatch::AtLeastOne); fixture .test_storage @@ -1679,11 +1675,11 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_int_field_column("TheMeasurement", "Field1") - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_int_field_column("Field1") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -1727,9 +1723,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -1798,10 +1792,10 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -1847,9 +1841,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -1953,10 +1945,10 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -1999,9 +1991,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -2041,10 +2031,10 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -2093,9 +2083,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -2177,10 +2165,10 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -2237,10 +2225,10 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -2305,9 +2293,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0) - .with_table("my_table") - .with_error("Sugar we are going down"); + let chunk = TestChunk::new("my_table").with_error("Sugar we are going down"); fixture .test_storage @@ -2363,11 +2349,11 @@ mod tests { let partition_id = 1; // Add a chunk with a field - let chunk = TestChunk::new(0) - .with_int_field_column("TheMeasurement", "Field1") - .with_time_column("TheMeasurement") - .with_tag_column("TheMeasurement", "state") - .with_one_row_of_null_data("TheMeasurement"); + let chunk = TestChunk::new("TheMeasurement") + .with_int_field_column("Field1") + .with_time_column() + .with_tag_column("state") + .with_one_row_of_null_data(); fixture .test_storage @@ -2413,7 +2399,7 @@ mod tests { let db_info = OrgAndBucket::new(123, 456); let partition_id = 1; - let chunk = TestChunk::new(0).with_error("Sugar we are going down"); + let chunk = TestChunk::new("t").with_error("Sugar we are going down"); fixture .test_storage From 92cb5986f1d0e49c26498f2afab51aeb4fb38398 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 14:34:31 -0400 Subject: [PATCH 04/17] test: Initialize a schema on TestChunk to always exist --- query/src/test.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/query/src/test.rs b/query/src/test.rs index 04e132655f..cfd4aba7f1 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -146,7 +146,7 @@ pub struct TestChunk { table_name: String, /// Schema of the table - table_schema: Option, + schema: Arc, /// RecordBatches that are returned on each request table_data: Vec>, @@ -165,6 +165,7 @@ impl TestChunk { pub fn new(table_name: impl Into) -> Self { Self { table_name: table_name.into(), + schema: Arc::new(SchemaBuilder::new().build().unwrap()), ..Default::default() } } From 22d4040c81c3f6567b054a2c93f25b6d50c0dd95 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 14:55:31 -0400 Subject: [PATCH 05/17] test: Always initialize a Schema for TestChunk --- query/src/test.rs | 129 ++++++++++++++++++---------------------------- 1 file changed, 51 insertions(+), 78 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index cfd4aba7f1..dbd5aa1f0d 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -132,8 +132,14 @@ impl QueryDatabase for TestDatabase { } } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct TestChunk { + /// Table name + table_name: String, + + /// Schema of the table + schema: Arc, + id: u32, /// Set the flag if this chunk might contain duplicates @@ -142,12 +148,6 @@ pub struct TestChunk { /// A copy of the captured predicates passed predicates: Mutex>, - /// Table name - table_name: String, - - /// Schema of the table - schema: Arc, - /// RecordBatches that are returned on each request table_data: Vec>, @@ -166,7 +166,13 @@ impl TestChunk { Self { table_name: table_name.into(), schema: Arc::new(SchemaBuilder::new().build().unwrap()), - ..Default::default() + id: Default::default(), + may_contain_pk_duplicates: Default::default(), + predicates: Default::default(), + table_data: Default::default(), + saved_error: Default::default(), + predicate_match: Default::default(), + table_summary: Default::default(), } } @@ -323,15 +329,10 @@ impl TestChunk { let mut merger = SchemaMerger::new(); merger = merger.merge(&new_column_schema).unwrap(); - - if let Some(existing_schema) = self.table_schema.as_ref() { - merger = merger - .merge(existing_schema) - .expect("merging was successful"); - } - let new_schema = merger.build(); - - self.table_schema = Some(new_schema); + merger = merger + .merge(self.schema.as_ref()) + .expect("merging was successful"); + self.schema = Arc::new(merger.build()); let mut table_summary = self .table_summary @@ -351,14 +352,9 @@ impl TestChunk { /// Prepares this chunk to return a specific record batch with one /// row of non null data. pub fn with_one_row_of_null_data(mut self) -> Self { - //let table_name = table_name.into(); - let schema = self - .table_schema - .as_ref() - .expect("table must exist in TestChunk"); - // create arrays - let columns = schema + let columns = self + .schema .iter() .map(|(_influxdb_column_type, field)| match field.data_type() { DataType::Int64 => Arc::new(Int64Array::from(vec![1000])) as ArrayRef, @@ -379,7 +375,8 @@ impl TestChunk { }) .collect::>(); - let batch = RecordBatch::try_new(schema.into(), columns).expect("made record batch"); + let batch = + RecordBatch::try_new(self.schema.as_ref().into(), columns).expect("made record batch"); println!("TestChunk batch data: {:#?}", batch); self.table_data.push(Arc::new(batch)); @@ -397,13 +394,9 @@ impl TestChunk { /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(UT, WA), tag2(RI, SC), time(8000, 20000) pub fn with_three_rows_of_data(mut self) -> Self { - let schema = self - .table_schema - .as_ref() - .expect("table must exist in TestChunk"); - // create arrays - let columns = schema + let columns = self + .schema .iter() .map(|(_influxdb_column_type, field)| match field.data_type() { DataType::Int64 => Arc::new(Int64Array::from(vec![1000, 10, 70])) as ArrayRef, @@ -443,7 +436,8 @@ impl TestChunk { }) .collect::>(); - let batch = RecordBatch::try_new(schema.into(), columns).expect("made record batch"); + let batch = + RecordBatch::try_new(self.schema.as_ref().into(), columns).expect("made record batch"); self.table_data.push(Arc::new(batch)); self @@ -461,13 +455,9 @@ impl TestChunk { /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(UT, WA), tag2(RI, SC), time(28000, 220000) pub fn with_four_rows_of_data(mut self) -> Self { - let schema = self - .table_schema - .as_ref() - .expect("table must exist in TestChunk"); - // create arrays - let columns = schema + let columns = self + .schema .iter() .map(|(_influxdb_column_type, field)| match field.data_type() { DataType::Int64 => Arc::new(Int64Array::from(vec![1000, 10, 70, 50])) as ArrayRef, @@ -507,7 +497,8 @@ impl TestChunk { }) .collect::>(); - let batch = RecordBatch::try_new(schema.into(), columns).expect("made record batch"); + let batch = + RecordBatch::try_new(self.schema.as_ref().into(), columns).expect("made record batch"); self.table_data.push(Arc::new(batch)); self @@ -526,13 +517,9 @@ impl TestChunk { /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(AL, MT), tag2(AL, MA), time(5, 7000) pub fn with_five_rows_of_data(mut self) -> Self { - let schema = self - .table_schema - .as_ref() - .expect("table must exist in TestChunk"); - // create arrays - let columns = schema + let columns = self + .schema .iter() .map(|(_influxdb_column_type, field)| match field.data_type() { DataType::Int64 => { @@ -579,7 +566,8 @@ impl TestChunk { }) .collect::>(); - let batch = RecordBatch::try_new(schema.into(), columns).expect("made record batch"); + let batch = + RecordBatch::try_new(self.schema.as_ref().into(), columns).expect("made record batch"); self.table_data.push(Arc::new(batch)); self @@ -603,14 +591,9 @@ impl TestChunk { /// "+------+------+-----------+-------------------------------+", /// Stats(min, max) : tag1(AL, MT), tag2(AL, MA), time(5, 7000) pub fn with_ten_rows_of_data_some_duplicates(mut self) -> Self { - //let table_name = table_name.into(); - let schema = self - .table_schema - .as_ref() - .expect("table must exist in TestChunk"); - // create arrays - let columns = schema + let columns = self + .schema .iter() .map(|(_influxdb_column_type, field)| match field.data_type() { DataType::Int64 => Arc::new(Int64Array::from(vec![ @@ -661,35 +644,28 @@ impl TestChunk { }) .collect::>(); - let batch = RecordBatch::try_new(schema.into(), columns).expect("made record batch"); + let batch = + RecordBatch::try_new(self.schema.as_ref().into(), columns).expect("made record batch"); self.table_data.push(Arc::new(batch)); self } /// Returns all columns of the table - pub fn all_column_names(&self) -> Option { - let column_names = self.table_schema.as_ref().map(|schema| { - schema - .iter() - .map(|(_, field)| field.name().to_string()) - .collect::() - }); - - column_names + pub fn all_column_names(&self) -> StringSet { + self.schema + .iter() + .map(|(_, field)| field.name().to_string()) + .collect() } /// Returns just the specified columns - pub fn specific_column_names_selection(&self, columns: &[&str]) -> Option { - let column_names = self.table_schema.as_ref().map(|schema| { - schema - .iter() - .map(|(_, field)| field.name().to_string()) - .filter(|col| columns.contains(&col.as_str())) - .collect::() - }); - - column_names + pub fn specific_column_names_selection(&self, columns: &[&str]) -> StringSet { + self.schema + .iter() + .map(|(_, field)| field.name().to_string()) + .filter(|col| columns.contains(&col.as_str())) + .collect() } } @@ -774,7 +750,7 @@ impl QueryChunk for TestChunk { Selection::Some(cols) => self.specific_column_names_selection(cols), }; - Ok(column_names) + Ok(Some(column_names)) } } @@ -786,10 +762,7 @@ impl QueryChunkMeta for TestChunk { } fn schema(&self) -> Arc { - self.table_schema - .as_ref() - .map(|s| Arc::new(s.clone())) - .expect("schema was set") + Arc::clone(&self.schema) } } From 4406d8a21952ab944bdc609ea3a6c7dc9778a1f4 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 19:56:30 -0400 Subject: [PATCH 06/17] test: Always initialize a TableSummary on TestChunk --- query/src/test.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index dbd5aa1f0d..2a77052fee 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -140,6 +140,9 @@ pub struct TestChunk { /// Schema of the table schema: Arc, + /// Return value for summary() + table_summary: TableSummary, + id: u32, /// Set the flag if this chunk might contain duplicates @@ -156,23 +159,21 @@ pub struct TestChunk { /// Return value for apply_predicate, if desired predicate_match: Option, - - /// Return value for summary(), if desired - table_summary: Option, } impl TestChunk { pub fn new(table_name: impl Into) -> Self { + let table_name = table_name.into(); Self { - table_name: table_name.into(), + table_name: table_name.clone(), schema: Arc::new(SchemaBuilder::new().build().unwrap()), + table_summary: TableSummary::new(table_name), id: Default::default(), may_contain_pk_duplicates: Default::default(), predicates: Default::default(), table_data: Default::default(), saved_error: Default::default(), predicate_match: Default::default(), - table_summary: Default::default(), } } @@ -234,8 +235,6 @@ impl TestChunk { // Now, find the appropriate column summary and update the stats let column_summary: &mut ColumnSummary = new_self .table_summary - .as_mut() - .expect("had table summary") .columns .iter_mut() .find(|c| c.name == column_name) @@ -266,8 +265,6 @@ impl TestChunk { // Now, find the appropriate column summary and update the stats let column_summary: &mut ColumnSummary = new_self .table_summary - .as_mut() - .expect("had table summary") .columns .iter_mut() .find(|c| c.name == TIME_COLUMN_NAME) @@ -334,12 +331,7 @@ impl TestChunk { .expect("merging was successful"); self.schema = Arc::new(merger.build()); - let mut table_summary = self - .table_summary - .take() - .unwrap_or_else(|| TableSummary::new(&self.table_name)); - table_summary.columns.push(column_summary); - self.table_summary = Some(table_summary); + self.table_summary.columns.push(column_summary); self } @@ -756,9 +748,7 @@ impl QueryChunk for TestChunk { impl QueryChunkMeta for TestChunk { fn summary(&self) -> &TableSummary { - self.table_summary - .as_ref() - .expect("Table summary not configured for TestChunk") + &self.table_summary } fn schema(&self) -> Arc { From e05ca7f98b3ea2ae190170a575958167409d4b17 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 20:28:21 -0400 Subject: [PATCH 07/17] fix: Change a method name that says null to not say null The comment and implementation seem to indicate this is creating non-null data. --- query/src/test.rs | 2 +- src/influxdb_ioxd/rpc/storage/service.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index 2a77052fee..cb50241d25 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -343,7 +343,7 @@ impl TestChunk { /// Prepares this chunk to return a specific record batch with one /// row of non null data. - pub fn with_one_row_of_null_data(mut self) -> Self { + pub fn with_one_row_of_data(mut self) -> Self { // create arrays let columns = self .schema diff --git a/src/influxdb_ioxd/rpc/storage/service.rs b/src/influxdb_ioxd/rpc/storage/service.rs index 3b87ea47bd..53781fceed 100644 --- a/src/influxdb_ioxd/rpc/storage/service.rs +++ b/src/influxdb_ioxd/rpc/storage/service.rs @@ -1588,7 +1588,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -1679,7 +1679,7 @@ mod tests { .with_int_field_column("Field1") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -1795,7 +1795,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -1948,7 +1948,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -2034,7 +2034,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -2168,7 +2168,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -2228,7 +2228,7 @@ mod tests { let chunk = TestChunk::new("TheMeasurement") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage @@ -2353,7 +2353,7 @@ mod tests { .with_int_field_column("Field1") .with_time_column() .with_tag_column("state") - .with_one_row_of_null_data(); + .with_one_row_of_data(); fixture .test_storage From 6cd75bc688e94847bbd19c90e2393ffffdca6ac9 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 21:13:42 -0400 Subject: [PATCH 08/17] test: Optionally take stats in add_schema_to_table This gets rid of a lookup and construction of default stats that aren't necessary --- query/src/test.rs | 50 ++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index cb50241d25..8754ace59f 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -21,9 +21,7 @@ use data_types::{ use datafusion::physical_plan::{common::SizedRecordBatchStream, SendableRecordBatchStream}; use futures::StreamExt; use internal_types::{ - schema::{ - builder::SchemaBuilder, merge::SchemaMerger, InfluxColumnType, Schema, TIME_COLUMN_NAME, - }, + schema::{builder::SchemaBuilder, merge::SchemaMerger, InfluxColumnType, Schema}, selection::Selection, }; use parking_lot::Mutex; @@ -218,7 +216,7 @@ impl TestChunk { // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().tag(&column_name).build().unwrap(); - self.add_schema_to_table(new_column_schema) + self.add_schema_to_table(new_column_schema, None) } /// Register an tag column with the test chunk @@ -230,23 +228,18 @@ impl TestChunk { ) -> Self { let column_name = column_name.into(); - let mut new_self = self.with_tag_column(&column_name); + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new().tag(&column_name).build().unwrap(); - // Now, find the appropriate column summary and update the stats - let column_summary: &mut ColumnSummary = new_self - .table_summary - .columns - .iter_mut() - .find(|c| c.name == column_name) - .expect("had column"); - - column_summary.stats = Statistics::String(StatValues { + // Construct stats + let stats = Statistics::String(StatValues { min: Some(min.to_string()), max: Some(max.to_string()), ..Default::default() }); - new_self + self.add_schema_to_table(new_column_schema, Some(stats)) } /// Register a timestamp column with the test chunk @@ -255,28 +248,23 @@ impl TestChunk { // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().timestamp().build().unwrap(); - self.add_schema_to_table(new_column_schema) + self.add_schema_to_table(new_column_schema, None) } /// Register a timestamp column with the test chunk pub fn with_time_column_with_stats(self, min: i64, max: i64) -> Self { - let mut new_self = self.with_time_column(); + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new().timestamp().build().unwrap(); - // Now, find the appropriate column summary and update the stats - let column_summary: &mut ColumnSummary = new_self - .table_summary - .columns - .iter_mut() - .find(|c| c.name == TIME_COLUMN_NAME) - .expect("had column"); - - column_summary.stats = Statistics::I64(StatValues { + // Construct stats + let stats = Statistics::I64(StatValues { min: Some(min), max: Some(max), ..Default::default() }); - new_self + self.add_schema_to_table(new_column_schema, Some(stats)) } /// Register an int field column with the test chunk @@ -289,10 +277,10 @@ impl TestChunk { .field(&column_name, DataType::Int64) .build() .unwrap(); - self.add_schema_to_table(new_column_schema) + self.add_schema_to_table(new_column_schema, None) } - fn add_schema_to_table(mut self, new_column_schema: Schema) -> Self { + fn add_schema_to_table(mut self, new_column_schema: Schema, stats: Option) -> Self { // assume the new schema has exactly a single table assert_eq!(new_column_schema.len(), 1); let (col_type, new_field) = new_column_schema.field(0); @@ -304,7 +292,7 @@ impl TestChunk { InfluxColumnType::Timestamp => InfluxDbType::Timestamp, }); - let stats = match new_field.data_type() { + let stats = stats.unwrap_or_else(|| match new_field.data_type() { DataType::Boolean => Statistics::Bool(StatValues::default()), DataType::Int64 => Statistics::I64(StatValues::default()), DataType::UInt64 => Statistics::U64(StatValues::default()), @@ -316,7 +304,7 @@ impl TestChunk { DataType::Float64 => Statistics::String(StatValues::default()), DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()), _ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()), - }; + }); let column_summary = ColumnSummary { name: new_field.name().clone(), From b26aae1cb471dd4b71e2c16b5f3bd9f2e84adf65 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 9 Jul 2021 21:45:37 -0400 Subject: [PATCH 09/17] test: Add an arg to control whether to add a column summary at all Always true for now, but there are some cases in query/src/pruning.rs that don't add any column summaries that will use this with `false`. --- query/src/test.rs | 86 ++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index 8754ace59f..60603d4ffa 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -208,7 +208,7 @@ impl TestChunk { self } - /// Register an tag column with the test chunk + /// Register a tag column with the test chunk with default stats pub fn with_tag_column(self, column_name: impl Into) -> Self { let column_name = column_name.into(); @@ -216,10 +216,10 @@ impl TestChunk { // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().tag(&column_name).build().unwrap(); - self.add_schema_to_table(new_column_schema, None) + self.add_schema_to_table(new_column_schema, true, None) } - /// Register an tag column with the test chunk + /// Register a tag column with the test chunk pub fn with_tag_column_with_stats( self, column_name: impl Into, @@ -239,16 +239,16 @@ impl TestChunk { ..Default::default() }); - self.add_schema_to_table(new_column_schema, Some(stats)) + self.add_schema_to_table(new_column_schema, true, Some(stats)) } - /// Register a timestamp column with the test chunk + /// Register a timestamp column with the test chunk with default stats pub fn with_time_column(self) -> Self { // make a new schema with the specified column and // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().timestamp().build().unwrap(); - self.add_schema_to_table(new_column_schema, None) + self.add_schema_to_table(new_column_schema, true, None) } /// Register a timestamp column with the test chunk @@ -264,10 +264,10 @@ impl TestChunk { ..Default::default() }); - self.add_schema_to_table(new_column_schema, Some(stats)) + self.add_schema_to_table(new_column_schema, true, Some(stats)) } - /// Register an int field column with the test chunk + /// Register an int field column with the test chunk with default stats pub fn with_int_field_column(self, column_name: impl Into) -> Self { let column_name = column_name.into(); @@ -277,41 +277,22 @@ impl TestChunk { .field(&column_name, DataType::Int64) .build() .unwrap(); - self.add_schema_to_table(new_column_schema, None) + self.add_schema_to_table(new_column_schema, true, None) } - fn add_schema_to_table(mut self, new_column_schema: Schema, stats: Option) -> Self { + /// Adds the specified schema and optionally a column summary containing optional stats. + /// If `add_column_summary` is false, `stats` is ignored. If `add_column_summary` is true but + /// `stats` is `None`, default stats will be added to the column summary. + fn add_schema_to_table( + mut self, + new_column_schema: Schema, + add_column_summary: bool, + stats: Option, + ) -> Self { // assume the new schema has exactly a single table assert_eq!(new_column_schema.len(), 1); let (col_type, new_field) = new_column_schema.field(0); - let influxdb_type = col_type.map(|t| match t { - InfluxColumnType::IOx(_) => todo!(), - InfluxColumnType::Tag => InfluxDbType::Tag, - InfluxColumnType::Field(_) => InfluxDbType::Field, - InfluxColumnType::Timestamp => InfluxDbType::Timestamp, - }); - - let stats = stats.unwrap_or_else(|| match new_field.data_type() { - DataType::Boolean => Statistics::Bool(StatValues::default()), - DataType::Int64 => Statistics::I64(StatValues::default()), - DataType::UInt64 => Statistics::U64(StatValues::default()), - DataType::Utf8 => Statistics::String(StatValues::default()), - DataType::Dictionary(_, value_type) => { - assert!(matches!(**value_type, DataType::Utf8)); - Statistics::String(StatValues::default()) - } - DataType::Float64 => Statistics::String(StatValues::default()), - DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()), - _ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()), - }); - - let column_summary = ColumnSummary { - name: new_field.name().clone(), - influxdb_type, - stats, - }; - let mut merger = SchemaMerger::new(); merger = merger.merge(&new_column_schema).unwrap(); merger = merger @@ -319,7 +300,36 @@ impl TestChunk { .expect("merging was successful"); self.schema = Arc::new(merger.build()); - self.table_summary.columns.push(column_summary); + if add_column_summary { + let influxdb_type = col_type.map(|t| match t { + InfluxColumnType::IOx(_) => todo!(), + InfluxColumnType::Tag => InfluxDbType::Tag, + InfluxColumnType::Field(_) => InfluxDbType::Field, + InfluxColumnType::Timestamp => InfluxDbType::Timestamp, + }); + + let stats = stats.unwrap_or_else(|| match new_field.data_type() { + DataType::Boolean => Statistics::Bool(StatValues::default()), + DataType::Int64 => Statistics::I64(StatValues::default()), + DataType::UInt64 => Statistics::U64(StatValues::default()), + DataType::Utf8 => Statistics::String(StatValues::default()), + DataType::Dictionary(_, value_type) => { + assert!(matches!(**value_type, DataType::Utf8)); + Statistics::String(StatValues::default()) + } + DataType::Float64 => Statistics::String(StatValues::default()), + DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()), + _ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()), + }); + + let column_summary = ColumnSummary { + name: new_field.name().clone(), + influxdb_type, + stats, + }; + + self.table_summary.columns.push(column_summary); + } self } From ee545ce90ef9c640a04cda67ed0d4cd1d8b8f835 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 21:28:24 -0400 Subject: [PATCH 10/17] test: Make _with_stats methods able to optionally take max/min Not used yet, but will be when this is unified with query/src/pruning.rs --- query/src/frontend/reorg.rs | 8 ++--- query/src/provider.rs | 68 ++++++++++++++++++------------------- query/src/test.rs | 14 ++++---- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/query/src/frontend/reorg.rs b/query/src/frontend/reorg.rs index f55b251dda..91656408ec 100644 --- a/query/src/frontend/reorg.rs +++ b/query/src/frontend/reorg.rs @@ -271,8 +271,8 @@ mod test { // Chunk 1 with 5 rows of data on 2 tags let chunk1 = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_five_rows_of_data(), ); @@ -280,8 +280,8 @@ mod test { // Chunk 2 has an extra field, and only 4 fields let chunk2 = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_int_field_column("field_int2") .with_four_rows_of_data(), diff --git a/query/src/provider.rs b/query/src/provider.rs index 69a09e4391..1dd832c903 100644 --- a/query/src/provider.rs +++ b/query/src/provider.rs @@ -765,31 +765,31 @@ mod test { // in the duplicate module // c1: no overlaps - let c1 = Arc::new( - TestChunk::new("t") - .with_id(1) - .with_tag_column_with_stats("tag1", "a", "b"), - ); + let c1 = Arc::new(TestChunk::new("t").with_id(1).with_tag_column_with_stats( + "tag1", + Some("a"), + Some("b"), + )); // c2: over lap with c3 - let c2 = Arc::new( - TestChunk::new("t") - .with_id(2) - .with_tag_column_with_stats("tag1", "c", "d"), - ); + let c2 = Arc::new(TestChunk::new("t").with_id(2).with_tag_column_with_stats( + "tag1", + Some("c"), + Some("d"), + )); // c3: overlap with c2 - let c3 = Arc::new( - TestChunk::new("t") - .with_id(3) - .with_tag_column_with_stats("tag1", "c", "d"), - ); + let c3 = Arc::new(TestChunk::new("t").with_id(3).with_tag_column_with_stats( + "tag1", + Some("c"), + Some("d"), + )); // c4: self overlap let c4 = Arc::new( TestChunk::new("t") .with_id(4) - .with_tag_column_with_stats("tag1", "e", "f") + .with_tag_column_with_stats("tag1", Some("e"), Some("f")) .with_may_contain_pk_duplicates(true), ); @@ -1295,8 +1295,8 @@ mod test { // Test no duplicate at all let chunk = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_five_rows_of_data(), ); @@ -1343,8 +1343,8 @@ mod test { // Test one chunk with duplicate within let chunk = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_ten_rows_of_data_some_duplicates(), @@ -1399,8 +1399,8 @@ mod test { // Test one chunk with duplicate within let chunk = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_ten_rows_of_data_some_duplicates(), @@ -1464,16 +1464,16 @@ mod test { // test overlapped chunks let chunk1 = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_ten_rows_of_data_some_duplicates(), ); let chunk2 = Arc::new( TestChunk::new("t") - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_five_rows_of_data(), ); @@ -1534,8 +1534,8 @@ mod test { let chunk1 = Arc::new( TestChunk::new("t") .with_id(1) - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_ten_rows_of_data_some_duplicates(), ); @@ -1544,8 +1544,8 @@ mod test { let chunk2 = Arc::new( TestChunk::new("t") .with_id(2) - .with_time_column_with_stats(5, 7000) - .with_tag_column_with_stats("tag1", "AL", "MT") + .with_time_column_with_stats(Some(5), Some(7000)) + .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) .with_int_field_column("field_int") .with_five_rows_of_data(), ); @@ -1554,8 +1554,8 @@ mod test { let chunk3 = Arc::new( TestChunk::new("t") .with_id(3) - .with_time_column_with_stats(8000, 20000) - .with_tag_column_with_stats("tag1", "UT", "WA") + .with_time_column_with_stats(Some(8000), Some(20000)) + .with_tag_column_with_stats("tag1", Some("UT"), Some("WA")) .with_int_field_column("field_int") .with_three_rows_of_data(), ); @@ -1564,8 +1564,8 @@ mod test { let chunk4 = Arc::new( TestChunk::new("t") .with_id(4) - .with_time_column_with_stats(28000, 220000) - .with_tag_column_with_stats("tag1", "UT", "WA") + .with_time_column_with_stats(Some(28000), Some(220000)) + .with_tag_column_with_stats("tag1", Some("UT"), Some("WA")) .with_int_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_four_rows_of_data(), diff --git a/query/src/test.rs b/query/src/test.rs index 60603d4ffa..e1611afed2 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -223,8 +223,8 @@ impl TestChunk { pub fn with_tag_column_with_stats( self, column_name: impl Into, - min: &str, - max: &str, + min: Option<&str>, + max: Option<&str>, ) -> Self { let column_name = column_name.into(); @@ -234,8 +234,8 @@ impl TestChunk { // Construct stats let stats = Statistics::String(StatValues { - min: Some(min.to_string()), - max: Some(max.to_string()), + min: min.map(ToString::to_string), + max: max.map(ToString::to_string), ..Default::default() }); @@ -252,15 +252,15 @@ impl TestChunk { } /// Register a timestamp column with the test chunk - pub fn with_time_column_with_stats(self, min: i64, max: i64) -> Self { + pub fn with_time_column_with_stats(self, min: Option, max: Option) -> Self { // make a new schema with the specified column and // merge it in to any existing schema let new_column_schema = SchemaBuilder::new().timestamp().build().unwrap(); // Construct stats let stats = Statistics::I64(StatValues { - min: Some(min), - max: Some(max), + min, + max, ..Default::default() }); From 54f7ee8b8db003ad893f506478ce2b1e080c6e68 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 20:41:36 -0400 Subject: [PATCH 11/17] refactor: Implement TestChunkMeta in terms of TestChunk This is a temporary step to make sure TestChunk does everything TestChunkMeta needs --- query/src/pruning.rs | 142 +++++++++++++------------------------- query/src/test.rs | 159 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 204 insertions(+), 97 deletions(-) diff --git a/query/src/pruning.rs b/query/src/pruning.rs index c56b29374d..dcbccae433 100644 --- a/query/src/pruning.rs +++ b/query/src/pruning.rs @@ -168,11 +168,10 @@ mod test { use std::{cell::RefCell, fmt, sync::Arc}; use arrow::datatypes::DataType; - use data_types::partition_metadata::{ColumnSummary, StatValues, Statistics}; use datafusion::logical_plan::{col, lit}; - use internal_types::schema::{builder::SchemaBuilder, merge::SchemaMerger, Schema}; + use internal_types::schema::{builder::SchemaBuilder, Schema}; - use crate::predicate::PredicateBuilder; + use crate::{predicate::PredicateBuilder, test::TestChunk, QueryChunk}; #[test] fn test_empty() { @@ -657,7 +656,7 @@ mod test { } fn names(pruned: &[Arc]) -> Vec<&str> { - pruned.iter().map(|p| p.name.as_str()).collect() + pruned.iter().map(|p| p.test_chunk.table_name()).collect() } #[derive(Debug, Default)] @@ -695,49 +694,15 @@ mod test { } } - #[derive(Debug, Clone)] + #[derive(Debug)] struct TestChunkMeta { - name: String, - summary: TableSummary, - schema: Arc, - } - - /// Implementation of creating a new column with statitics for TestChunkMeta - macro_rules! impl_with_column { - ($SELF:expr, $COLUMN_NAME:expr, $MIN:expr, $MAX:expr, $DATA_TYPE:ident, $STAT_TYPE:ident) => {{ - let Self { - name, - summary, - schema, - } = $SELF; - let column_name = $COLUMN_NAME.into(); - let new_self = Self { - name, - schema: Self::add_field_to_schema(&column_name, schema, DataType::$DATA_TYPE), - summary: Self::add_column_to_summary( - summary, - column_name, - Statistics::$STAT_TYPE(StatValues { - distinct_count: None, - min: $MIN, - max: $MAX, - count: 42, - }), - ), - }; - new_self - }}; + test_chunk: TestChunk, } impl TestChunkMeta { fn new(name: impl Into) -> Self { - let name = name.into(); - let summary = TableSummary::new(&name); - let schema = Arc::new(SchemaBuilder::new().build().unwrap()); Self { - name, - summary, - schema, + test_chunk: TestChunk::new(name), } } @@ -748,7 +713,11 @@ mod test { min: Option, max: Option, ) -> Self { - impl_with_column!(self, column_name, min, max, Float64, F64) + let Self { test_chunk } = self; + + let test_chunk = test_chunk.with_f64_field_column_with_stats(column_name, min, max); + + Self { test_chunk } } /// Adds an i64 column named into the schema @@ -758,22 +727,29 @@ mod test { min: Option, max: Option, ) -> Self { - impl_with_column!(self, column_name, min, max, Int64, I64) + let Self { test_chunk } = self; + + let test_chunk = test_chunk.with_int_field_column_with_stats(column_name, min, max); + + Self { test_chunk } } /// Adds an i64 column named into the schema, but with no stats - fn with_i64_column_no_stats(self, column_name: impl AsRef) -> Self { - let Self { - name, - summary, - schema, - } = self; - Self { - name, - schema: Self::add_field_to_schema(column_name.as_ref(), schema, DataType::Int64), - // Note we don't add any stats - summary, - } + fn with_i64_column_no_stats(self, column_name: impl Into) -> Self { + let Self { test_chunk } = self; + + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Int64) + .build() + .unwrap(); + + let test_chunk = test_chunk.add_schema_to_table(new_column_schema, false, None); + + Self { test_chunk } } /// Adds an u64 column named into the schema @@ -783,7 +759,11 @@ mod test { min: Option, max: Option, ) -> Self { - impl_with_column!(self, column_name, min, max, UInt64, U64) + let Self { test_chunk } = self; + + let test_chunk = test_chunk.with_u64_field_column_with_stats(column_name, min, max); + + Self { test_chunk } } /// Adds bool column named into the schema @@ -793,7 +773,11 @@ mod test { min: Option, max: Option, ) -> Self { - impl_with_column!(self, column_name, min, max, Boolean, Bool) + let Self { test_chunk } = self; + + let test_chunk = test_chunk.with_bool_field_column_with_stats(column_name, min, max); + + Self { test_chunk } } /// Adds a string column named into the schema @@ -803,59 +787,27 @@ mod test { min: Option<&str>, max: Option<&str>, ) -> Self { - let min = min.map(|v| v.to_string()); - let max = max.map(|v| v.to_string()); - impl_with_column!(self, column_name, min, max, Utf8, String) - } + let Self { test_chunk } = self; - fn add_field_to_schema( - column_name: &str, - schema: Arc, - data_type: DataType, - ) -> Arc { - let new_schema = SchemaBuilder::new() - .field(column_name, data_type) - .build() - .expect("built new field schema"); + let test_chunk = test_chunk.with_string_field_column_with_stats(column_name, min, max); - let new_schema = SchemaMerger::new() - .merge(schema.as_ref()) - .expect("merged existing schema") - .merge(&new_schema) - .expect("merged new schema") - .build(); - - Arc::new(new_schema) - } - - fn add_column_to_summary( - mut summary: TableSummary, - column_name: impl Into, - stats: Statistics, - ) -> TableSummary { - summary.columns.push(ColumnSummary { - name: column_name.into(), - influxdb_type: None, - stats, - }); - - summary + Self { test_chunk } } } impl fmt::Display for TestChunkMeta { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.name) + write!(f, "{}", self.test_chunk.table_name()) } } impl QueryChunkMeta for TestChunkMeta { fn summary(&self) -> &TableSummary { - &self.summary + &self.test_chunk.summary() } fn schema(&self) -> Arc { - Arc::clone(&self.schema) + Arc::clone(&self.test_chunk.schema()) } } } diff --git a/query/src/test.rs b/query/src/test.rs index e1611afed2..398670403b 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -280,10 +280,165 @@ impl TestChunk { self.add_schema_to_table(new_column_schema, true, None) } + pub fn with_int_field_column_with_stats( + self, + column_name: impl Into, + min: Option, + max: Option, + ) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Int64) + .build() + .unwrap(); + + // Construct stats + let stats = Statistics::I64(StatValues { + min, + max, + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + + /// Register a u64 field column with the test chunk with default stats + pub fn with_u64_field_column(self, column_name: impl Into) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::UInt64) + .build() + .unwrap(); + self.add_schema_to_table(new_column_schema, true, None) + } + + pub fn with_u64_field_column_with_stats( + self, + column_name: impl Into, + min: Option, + max: Option, + ) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::UInt64) + .build() + .unwrap(); + + // Construct stats + let stats = Statistics::U64(StatValues { + min, + max, + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + + /// Register an f64 field column with the test chunk with default stats + pub fn with_f64_field_column(self, column_name: impl Into) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Float64) + .build() + .unwrap(); + + self.add_schema_to_table(new_column_schema, true, None) + } + + /// Register an f64 field column with the test chunk + pub fn with_f64_field_column_with_stats( + self, + column_name: impl Into, + min: Option, + max: Option, + ) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Float64) + .build() + .unwrap(); + + // Construct stats + let stats = Statistics::F64(StatValues { + min, + max, + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + + /// Register a bool field column with the test chunk + pub fn with_bool_field_column_with_stats( + self, + column_name: impl Into, + min: Option, + max: Option, + ) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Boolean) + .build() + .unwrap(); + + // Construct stats + let stats = Statistics::Bool(StatValues { + min, + max, + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + + /// Register a string field column with the test chunk + pub fn with_string_field_column_with_stats( + self, + column_name: impl Into, + min: Option<&str>, + max: Option<&str>, + ) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Utf8) + .build() + .unwrap(); + + // Construct stats + let stats = Statistics::String(StatValues { + min: min.map(ToString::to_string), + max: max.map(ToString::to_string), + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + /// Adds the specified schema and optionally a column summary containing optional stats. /// If `add_column_summary` is false, `stats` is ignored. If `add_column_summary` is true but /// `stats` is `None`, default stats will be added to the column summary. - fn add_schema_to_table( + pub fn add_schema_to_table( mut self, new_column_schema: Schema, add_column_summary: bool, @@ -317,7 +472,7 @@ impl TestChunk { assert!(matches!(**value_type, DataType::Utf8)); Statistics::String(StatValues::default()) } - DataType::Float64 => Statistics::String(StatValues::default()), + DataType::Float64 => Statistics::F64(StatValues::default()), DataType::Timestamp(_, _) => Statistics::I64(StatValues::default()), _ => panic!("Unsupported type in TestChunk: {:?}", new_field.data_type()), }); From b4c5a8708822f4f5129a59e2784257d79e199713 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 21:54:35 -0400 Subject: [PATCH 12/17] refactor: Rename int field to i64 field to be more consistent --- query/src/frontend/reorg.rs | 6 ++-- query/src/provider.rs | 46 ++++++++++++------------ query/src/pruning.rs | 2 +- query/src/test.rs | 4 +-- src/influxdb_ioxd/rpc/storage/service.rs | 4 +-- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/query/src/frontend/reorg.rs b/query/src/frontend/reorg.rs index 91656408ec..5519c5eb3f 100644 --- a/query/src/frontend/reorg.rs +++ b/query/src/frontend/reorg.rs @@ -273,7 +273,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -282,8 +282,8 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") - .with_int_field_column("field_int2") + .with_i64_field_column("field_int") + .with_i64_field_column("field_int2") .with_four_rows_of_data(), ); diff --git a/query/src/provider.rs b/query/src/provider.rs index 1dd832c903..f533858128 100644 --- a/query/src/provider.rs +++ b/query/src/provider.rs @@ -813,7 +813,7 @@ mod test { TestChunk::new("t") .with_time_column() .with_tag_column("tag1") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -868,7 +868,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -923,7 +923,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -961,7 +961,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -972,7 +972,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); // Datafusion schema of the chunk @@ -1031,7 +1031,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1042,7 +1042,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); let chunks = vec![chunk1, chunk2]; @@ -1105,7 +1105,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1115,7 +1115,7 @@ mod test { .with_id(2) .with_time_column() .with_tag_column("tag1") - .with_int_field_column("other_field_int") + .with_i64_field_column("other_field_int") .with_five_rows_of_data(), ); @@ -1125,7 +1125,7 @@ mod test { .with_id(3) .with_time_column() .with_tag_column("tag1") - .with_int_field_column("other_field_int") + .with_i64_field_column("other_field_int") .with_five_rows_of_data(), ); @@ -1197,7 +1197,7 @@ mod test { .with_time_column() .with_tag_column("tag1") .with_tag_column("tag2") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1208,7 +1208,7 @@ mod test { .with_time_column() .with_tag_column("tag3") .with_tag_column("tag1") - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1218,8 +1218,8 @@ mod test { .with_id(3) .with_time_column() .with_tag_column("tag3") - .with_int_field_column("field_int") - .with_int_field_column("field_int2") + .with_i64_field_column("field_int") + .with_i64_field_column("field_int2") .with_five_rows_of_data(), ); @@ -1297,7 +1297,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1345,7 +1345,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_ten_rows_of_data_some_duplicates(), ); @@ -1401,7 +1401,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_ten_rows_of_data_some_duplicates(), ); @@ -1466,7 +1466,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_ten_rows_of_data_some_duplicates(), ); @@ -1474,7 +1474,7 @@ mod test { TestChunk::new("t") .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1536,7 +1536,7 @@ mod test { .with_id(1) .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_ten_rows_of_data_some_duplicates(), ); @@ -1546,7 +1546,7 @@ mod test { .with_id(2) .with_time_column_with_stats(Some(5), Some(7000)) .with_tag_column_with_stats("tag1", Some("AL"), Some("MT")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_five_rows_of_data(), ); @@ -1556,7 +1556,7 @@ mod test { .with_id(3) .with_time_column_with_stats(Some(8000), Some(20000)) .with_tag_column_with_stats("tag1", Some("UT"), Some("WA")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_three_rows_of_data(), ); @@ -1566,7 +1566,7 @@ mod test { .with_id(4) .with_time_column_with_stats(Some(28000), Some(220000)) .with_tag_column_with_stats("tag1", Some("UT"), Some("WA")) - .with_int_field_column("field_int") + .with_i64_field_column("field_int") .with_may_contain_pk_duplicates(true) .with_four_rows_of_data(), ); diff --git a/query/src/pruning.rs b/query/src/pruning.rs index dcbccae433..8a275d3a1c 100644 --- a/query/src/pruning.rs +++ b/query/src/pruning.rs @@ -729,7 +729,7 @@ mod test { ) -> Self { let Self { test_chunk } = self; - let test_chunk = test_chunk.with_int_field_column_with_stats(column_name, min, max); + let test_chunk = test_chunk.with_i64_field_column_with_stats(column_name, min, max); Self { test_chunk } } diff --git a/query/src/test.rs b/query/src/test.rs index 398670403b..4f60b27129 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -268,7 +268,7 @@ impl TestChunk { } /// Register an int field column with the test chunk with default stats - pub fn with_int_field_column(self, column_name: impl Into) -> Self { + pub fn with_i64_field_column(self, column_name: impl Into) -> Self { let column_name = column_name.into(); // make a new schema with the specified column and @@ -280,7 +280,7 @@ impl TestChunk { self.add_schema_to_table(new_column_schema, true, None) } - pub fn with_int_field_column_with_stats( + pub fn with_i64_field_column_with_stats( self, column_name: impl Into, min: Option, diff --git a/src/influxdb_ioxd/rpc/storage/service.rs b/src/influxdb_ioxd/rpc/storage/service.rs index 53781fceed..cbdca28cda 100644 --- a/src/influxdb_ioxd/rpc/storage/service.rs +++ b/src/influxdb_ioxd/rpc/storage/service.rs @@ -1676,7 +1676,7 @@ mod tests { // Add a chunk with a field let chunk = TestChunk::new("TheMeasurement") - .with_int_field_column("Field1") + .with_i64_field_column("Field1") .with_time_column() .with_tag_column("state") .with_one_row_of_data(); @@ -2350,7 +2350,7 @@ mod tests { // Add a chunk with a field let chunk = TestChunk::new("TheMeasurement") - .with_int_field_column("Field1") + .with_i64_field_column("Field1") .with_time_column() .with_tag_column("state") .with_one_row_of_data(); From 96f9485792535331b91c6c499c88368f3816f80c Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 21:57:55 -0400 Subject: [PATCH 13/17] refactor: Move a with_no_stats method to be entirely defined on TestChunk --- query/src/pruning.rs | 14 ++------------ query/src/test.rs | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/query/src/pruning.rs b/query/src/pruning.rs index 8a275d3a1c..d9afeaaee8 100644 --- a/query/src/pruning.rs +++ b/query/src/pruning.rs @@ -167,9 +167,8 @@ mod test { use super::*; use std::{cell::RefCell, fmt, sync::Arc}; - use arrow::datatypes::DataType; use datafusion::logical_plan::{col, lit}; - use internal_types::schema::{builder::SchemaBuilder, Schema}; + use internal_types::schema::Schema; use crate::{predicate::PredicateBuilder, test::TestChunk, QueryChunk}; @@ -738,16 +737,7 @@ mod test { fn with_i64_column_no_stats(self, column_name: impl Into) -> Self { let Self { test_chunk } = self; - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Int64) - .build() - .unwrap(); - - let test_chunk = test_chunk.add_schema_to_table(new_column_schema, false, None); + let test_chunk = test_chunk.with_i64_field_column_no_stats(column_name); Self { test_chunk } } diff --git a/query/src/test.rs b/query/src/test.rs index 4f60b27129..ac34e3b6ff 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -280,6 +280,20 @@ impl TestChunk { self.add_schema_to_table(new_column_schema, true, None) } + /// Adds an i64 column named into the schema, but with no stats + pub fn with_i64_field_column_no_stats(self, column_name: impl Into) -> Self { + let column_name = column_name.into(); + + // make a new schema with the specified column and + // merge it in to any existing schema + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::Int64) + .build() + .unwrap(); + + self.add_schema_to_table(new_column_schema, false, None) + } + pub fn with_i64_field_column_with_stats( self, column_name: impl Into, From dc0b97e12159e4eafabb42898b17b6b165e18b11 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 22:10:19 -0400 Subject: [PATCH 14/17] refactor: Completely replace TestChunkMeta with TestChunk --- query/src/pruning.rs | 337 ++++++++++++++++++------------------------- query/src/test.rs | 10 +- 2 files changed, 150 insertions(+), 197 deletions(-) diff --git a/query/src/pruning.rs b/query/src/pruning.rs index d9afeaaee8..2f8804383e 100644 --- a/query/src/pruning.rs +++ b/query/src/pruning.rs @@ -165,18 +165,15 @@ impl<'a> PruningStatistics for ChunkMetaStats<'a> { #[cfg(test)] mod test { use super::*; - use std::{cell::RefCell, fmt, sync::Arc}; - - use datafusion::logical_plan::{col, lit}; - use internal_types::schema::Schema; - use crate::{predicate::PredicateBuilder, test::TestChunk, QueryChunk}; + use datafusion::logical_plan::{col, lit}; + use std::{cell::RefCell, sync::Arc}; #[test] fn test_empty() { test_helpers::maybe_start_logging(); let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1")); + let c1 = Arc::new(TestChunk::new("chunk1")); let predicate = PredicateBuilder::new().build(); let pruned = prune_chunks(&observer, vec![c1], &predicate); @@ -194,7 +191,7 @@ mod test { // column1 > 100.0 where // c1: [0.0, 10.0] --> pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_f64_column( + let c1 = Arc::new(TestChunk::new("chunk1").with_f64_field_column_with_stats( "column1", Some(0.0), Some(10.0), @@ -216,8 +213,11 @@ mod test { // c1: [0, 10] --> pruned let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_i64_column("column1", Some(0), Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_i64_field_column_with_stats( + "column1", + Some(0), + Some(10), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit(100))) @@ -236,8 +236,11 @@ mod test { // c1: [0, 10] --> pruned let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_u64_column("column1", Some(0), Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_u64_field_column_with_stats( + "column1", + Some(0), + Some(10), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit(100))) @@ -256,7 +259,7 @@ mod test { // c1: [false, false] --> pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_bool_column( + let c1 = Arc::new(TestChunk::new("chunk1").with_bool_field_column_with_stats( "column1", Some(false), Some(false), @@ -277,11 +280,13 @@ mod test { // c1: ["a", "q"] --> pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_string_column( - "column1", - Some("a"), - Some("q"), - )); + let c1 = Arc::new( + TestChunk::new("chunk1").with_string_field_column_with_stats( + "column1", + Some("a"), + Some("q"), + ), + ); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit("z"))) @@ -299,7 +304,7 @@ mod test { // column1 < 100.0 where // c1: [0.0, 10.0] --> not pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_f64_column( + let c1 = Arc::new(TestChunk::new("chunk1").with_f64_field_column_with_stats( "column1", Some(0.0), Some(10.0), @@ -321,8 +326,11 @@ mod test { // c1: [0, 10] --> not pruned let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_i64_column("column1", Some(0), Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_i64_field_column_with_stats( + "column1", + Some(0), + Some(10), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").lt(lit(100))) @@ -341,8 +349,11 @@ mod test { // c1: [0, 10] --> not pruned let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_u64_column("column1", Some(0), Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_u64_field_column_with_stats( + "column1", + Some(0), + Some(10), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").lt(lit(100))) @@ -361,7 +372,7 @@ mod test { // c1: [false, true] --> not pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_bool_column( + let c1 = Arc::new(TestChunk::new("chunk1").with_bool_field_column_with_stats( "column1", Some(false), Some(true), @@ -382,11 +393,13 @@ mod test { // c1: ["a", "q"] --> not pruned let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_string_column( - "column1", - Some("a"), - Some("q"), - )); + let c1 = Arc::new( + TestChunk::new("chunk1").with_string_field_column_with_stats( + "column1", + Some("a"), + Some("q"), + ), + ); let predicate = PredicateBuilder::new() .add_expr(col("column1").lt(lit("z"))) @@ -408,13 +421,23 @@ mod test { // c4: Null --> not pruned (no statistics at all) let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_i64_column("column1", None, Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_i64_field_column_with_stats( + "column1", + None, + Some(10), + )); - let c2 = Arc::new(TestChunkMeta::new("chunk2").with_i64_column("column1", Some(0), None)); + let c2 = Arc::new(TestChunk::new("chunk2").with_i64_field_column_with_stats( + "column1", + Some(0), + None, + )); - let c3 = Arc::new(TestChunkMeta::new("chunk3").with_i64_column("column1", None, None)); + let c3 = Arc::new( + TestChunk::new("chunk3").with_i64_field_column_with_stats("column1", None, None), + ); - let c4 = Arc::new(TestChunkMeta::new("chunk4").with_i64_column_no_stats("column1")); + let c4 = Arc::new(TestChunk::new("chunk4").with_i64_field_column_no_stats("column1")); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit(100))) @@ -438,20 +461,39 @@ mod test { // c6: [None, 10] --> pruned let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_i64_column("column1", Some(0), Some(10))); + let c1 = Arc::new(TestChunk::new("chunk1").with_i64_field_column_with_stats( + "column1", + Some(0), + Some(10), + )); - let c2 = - Arc::new(TestChunkMeta::new("chunk2").with_i64_column("column1", Some(0), Some(1000))); + let c2 = Arc::new(TestChunk::new("chunk2").with_i64_field_column_with_stats( + "column1", + Some(0), + Some(1000), + )); - let c3 = - Arc::new(TestChunkMeta::new("chunk3").with_i64_column("column1", Some(10), Some(20))); + let c3 = Arc::new(TestChunk::new("chunk3").with_i64_field_column_with_stats( + "column1", + Some(10), + Some(20), + )); - let c4 = Arc::new(TestChunkMeta::new("chunk4").with_i64_column("column1", None, None)); + let c4 = Arc::new( + TestChunk::new("chunk4").with_i64_field_column_with_stats("column1", None, None), + ); - let c5 = Arc::new(TestChunkMeta::new("chunk5").with_i64_column("column1", Some(10), None)); + let c5 = Arc::new(TestChunk::new("chunk5").with_i64_field_column_with_stats( + "column1", + Some(10), + None, + )); - let c6 = Arc::new(TestChunkMeta::new("chunk6").with_i64_column("column1", None, Some(20))); + let c6 = Arc::new(TestChunk::new("chunk6").with_i64_field_column_with_stats( + "column1", + None, + Some(20), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit(100))) @@ -475,19 +517,22 @@ mod test { // c3: None, column2 [0, 4] --> not pruned (no stats for column1) let observer = TestObserver::new(); let c1 = Arc::new( - TestChunkMeta::new("chunk1") - .with_i64_column("column1", Some(0), Some(100)) - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk1") + .with_i64_field_column_with_stats("column1", Some(0), Some(100)) + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); let c2 = Arc::new( - TestChunkMeta::new("chunk2") - .with_i64_column("column1", Some(0), Some(1000)) - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk2") + .with_i64_field_column_with_stats("column1", Some(0), Some(1000)) + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); - let c3 = - Arc::new(TestChunkMeta::new("chunk3").with_i64_column("column2", Some(0), Some(4))); + let c3 = Arc::new(TestChunk::new("chunk3").with_i64_field_column_with_stats( + "column2", + Some(0), + Some(4), + )); let predicate = PredicateBuilder::new() .add_expr(col("column1").gt(lit(100))) @@ -518,39 +563,39 @@ mod test { let observer = TestObserver::new(); let c1 = Arc::new( - TestChunkMeta::new("chunk1") - .with_i64_column("column1", Some(0), Some(1000)) - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk1") + .with_i64_field_column_with_stats("column1", Some(0), Some(1000)) + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); let c2 = Arc::new( - TestChunkMeta::new("chunk2") - .with_i64_column("column1", Some(0), Some(10)) - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk2") + .with_i64_field_column_with_stats("column1", Some(0), Some(10)) + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); let c3 = Arc::new( - TestChunkMeta::new("chunk3") - .with_i64_column("column1", Some(0), Some(10)) - .with_i64_column("column2", Some(5), Some(10)), + TestChunk::new("chunk3") + .with_i64_field_column_with_stats("column1", Some(0), Some(10)) + .with_i64_field_column_with_stats("column2", Some(5), Some(10)), ); let c4 = Arc::new( - TestChunkMeta::new("chunk4") - .with_i64_column("column1", Some(1000), Some(2000)) - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk4") + .with_i64_field_column_with_stats("column1", Some(1000), Some(2000)) + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); let c5 = Arc::new( - TestChunkMeta::new("chunk5") - .with_i64_column("column1", Some(0), Some(10)) - .with_i64_column_no_stats("column2"), + TestChunk::new("chunk5") + .with_i64_field_column_with_stats("column1", Some(0), Some(10)) + .with_i64_field_column_no_stats("column2"), ); let c6 = Arc::new( - TestChunkMeta::new("chunk6") - .with_i64_column_no_stats("column1") - .with_i64_column("column2", Some(0), Some(4)), + TestChunk::new("chunk6") + .with_i64_field_column_no_stats("column1") + .with_i64_field_column_with_stats("column2", Some(0), Some(4)), ); let predicate = PredicateBuilder::new() @@ -578,19 +623,23 @@ mod test { // c3: column1 [1000, 2000] --> pruned (types are correct) let observer = TestObserver::new(); - let c1 = Arc::new(TestChunkMeta::new("chunk1").with_string_column( - "column1", - Some("0"), - Some("9"), - )); + let c1 = Arc::new( + TestChunk::new("chunk1").with_string_field_column_with_stats( + "column1", + Some("0"), + Some("9"), + ), + ); - let c2 = Arc::new(TestChunkMeta::new("chunk2").with_string_column( - "column1", - Some("1000"), - Some("2000"), - )); + let c2 = Arc::new( + TestChunk::new("chunk2").with_string_field_column_with_stats( + "column1", + Some("1000"), + Some("2000"), + ), + ); - let c3 = Arc::new(TestChunkMeta::new("chunk3").with_i64_column( + let c3 = Arc::new(TestChunk::new("chunk3").with_i64_field_column_with_stats( "column1", Some(1000), Some(2000), @@ -626,19 +675,25 @@ mod test { // c4: column1 [1000u64, 2000u64] --> pruned (types are different) let observer = TestObserver::new(); - let c1 = - Arc::new(TestChunkMeta::new("chunk1").with_i64_column("column1", Some(0), Some(1000))); + let c1 = Arc::new(TestChunk::new("chunk1").with_i64_field_column_with_stats( + "column1", + Some(0), + Some(1000), + )); - let c2 = - Arc::new(TestChunkMeta::new("chunk2").with_u64_column("column1", Some(0), Some(1000))); + let c2 = Arc::new(TestChunk::new("chunk2").with_u64_field_column_with_stats( + "column1", + Some(0), + Some(1000), + )); - let c3 = Arc::new(TestChunkMeta::new("chunk3").with_i64_column( + let c3 = Arc::new(TestChunk::new("chunk3").with_i64_field_column_with_stats( "column1", Some(1000), Some(2000), )); - let c4 = Arc::new(TestChunkMeta::new("chunk4").with_u64_column( + let c4 = Arc::new(TestChunk::new("chunk4").with_u64_field_column_with_stats( "column1", Some(1000), Some(2000), @@ -654,8 +709,8 @@ mod test { assert_eq!(names(&pruned), vec!["chunk1", "chunk2"]); } - fn names(pruned: &[Arc]) -> Vec<&str> { - pruned.iter().map(|p| p.test_chunk.table_name()).collect() + fn names(pruned: &[Arc]) -> Vec<&str> { + pruned.iter().map(|p| p.table_name()).collect() } #[derive(Debug, Default)] @@ -674,7 +729,7 @@ mod test { } impl PruningObserver for TestObserver { - type Observed = TestChunkMeta; + type Observed = TestChunk; fn was_pruned(&self, chunk: &Self::Observed) { self.events.borrow_mut().push(format!("{}: Pruned", chunk)) @@ -692,112 +747,4 @@ mod test { .push(format!("{}: Could not prune chunk: {}", chunk, reason)) } } - - #[derive(Debug)] - struct TestChunkMeta { - test_chunk: TestChunk, - } - - impl TestChunkMeta { - fn new(name: impl Into) -> Self { - Self { - test_chunk: TestChunk::new(name), - } - } - - /// Adds an f64 column named into the schema - fn with_f64_column( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_f64_field_column_with_stats(column_name, min, max); - - Self { test_chunk } - } - - /// Adds an i64 column named into the schema - fn with_i64_column( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_i64_field_column_with_stats(column_name, min, max); - - Self { test_chunk } - } - - /// Adds an i64 column named into the schema, but with no stats - fn with_i64_column_no_stats(self, column_name: impl Into) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_i64_field_column_no_stats(column_name); - - Self { test_chunk } - } - - /// Adds an u64 column named into the schema - fn with_u64_column( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_u64_field_column_with_stats(column_name, min, max); - - Self { test_chunk } - } - - /// Adds bool column named into the schema - fn with_bool_column( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_bool_field_column_with_stats(column_name, min, max); - - Self { test_chunk } - } - - /// Adds a string column named into the schema - fn with_string_column( - self, - column_name: impl Into, - min: Option<&str>, - max: Option<&str>, - ) -> Self { - let Self { test_chunk } = self; - - let test_chunk = test_chunk.with_string_field_column_with_stats(column_name, min, max); - - Self { test_chunk } - } - } - - impl fmt::Display for TestChunkMeta { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.test_chunk.table_name()) - } - } - - impl QueryChunkMeta for TestChunkMeta { - fn summary(&self) -> &TableSummary { - &self.test_chunk.summary() - } - - fn schema(&self) -> Arc { - Arc::clone(&self.test_chunk.schema()) - } - } } diff --git a/query/src/test.rs b/query/src/test.rs index ac34e3b6ff..3e0a7c36d4 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -26,7 +26,7 @@ use internal_types::{ }; use parking_lot::Mutex; use snafu::Snafu; -use std::{collections::BTreeMap, sync::Arc}; +use std::{collections::BTreeMap, fmt, sync::Arc}; #[derive(Debug, Default)] pub struct TestDatabase { @@ -452,7 +452,7 @@ impl TestChunk { /// Adds the specified schema and optionally a column summary containing optional stats. /// If `add_column_summary` is false, `stats` is ignored. If `add_column_summary` is true but /// `stats` is `None`, default stats will be added to the column summary. - pub fn add_schema_to_table( + fn add_schema_to_table( mut self, new_column_schema: Schema, add_column_summary: bool, @@ -828,6 +828,12 @@ impl TestChunk { } } +impl fmt::Display for TestChunk { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.table_name()) + } +} + impl QueryChunk for TestChunk { type Error = TestError; From 1698edcc39519a91b83e08dd250e7e30df40d6e8 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 22:23:44 -0400 Subject: [PATCH 15/17] refactor: Implement query::provider::overlap::TestChunk in terms of query::test::TestChunk --- query/src/provider/overlap.rs | 102 ++++++++++------------------------ 1 file changed, 28 insertions(+), 74 deletions(-) diff --git a/query/src/provider/overlap.rs b/query/src/provider/overlap.rs index f1e001a413..2360712b04 100644 --- a/query/src/provider/overlap.rs +++ b/query/src/provider/overlap.rs @@ -276,13 +276,11 @@ where #[cfg(test)] mod test { - use data_types::partition_metadata::{ - ColumnSummary, InfluxDbType, StatValues, Statistics, TableSummary, - }; - use internal_types::schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME}; - use std::sync::Arc; - use super::*; + use crate::{test::TestChunk as TestChunkInner, QueryChunk}; + use data_types::partition_metadata::TableSummary; + use internal_types::schema::Schema; + use std::sync::Arc; #[macro_export] macro_rules! assert_groups_eq { @@ -569,115 +567,71 @@ mod test { fn to_string(groups: Vec>) -> Vec { let mut s = vec![]; for (idx, group) in groups.iter().enumerate() { - let names = group.iter().map(|c| c.name.as_str()).collect::>(); + let names = group + .iter() + .map(|c| c.test_chunk_inner.table_name()) + .collect::>(); s.push(format!("Group {}: [{}]", idx, names.join(", "))); } s } /// Mocked out prunable provider to use testing overlaps - #[derive(Debug, Clone)] + #[derive(Debug)] struct TestChunk { - // The name of this chunk - name: String, - summary: TableSummary, - builder: SchemaBuilder, - } - - /// Implementation of creating a new column with statitics for TestChunkMeta - macro_rules! make_stats { - ($MIN:expr, $MAX:expr, $STAT_TYPE:ident) => {{ - Statistics::$STAT_TYPE(StatValues { - distinct_count: None, - min: $MIN, - max: $MAX, - count: 42, - }) - }}; + test_chunk_inner: TestChunkInner, } impl TestChunk { /// Create a new TestChunk with a specified name fn new(name: impl Into) -> Self { - let name = name.into(); - let summary = TableSummary::new(name.clone()); - let builder = SchemaBuilder::new(); Self { - name, - summary, - builder, + test_chunk_inner: TestChunkInner::new(name), } } /// Adds a tag column with the specified min/max values - fn with_tag( - mut self, - name: impl Into, - min: Option<&str>, - max: Option<&str>, - ) -> Self { - let min = min.map(|v| v.to_string()); - let max = max.map(|v| v.to_string()); + fn with_tag(self, name: impl Into, min: Option<&str>, max: Option<&str>) -> Self { + let Self { test_chunk_inner } = self; - let tag_name = name.into(); - self.builder.tag(&tag_name); + let test_chunk_inner = test_chunk_inner.with_tag_column_with_stats(name, min, max); - self.summary.columns.push(ColumnSummary { - name: tag_name, - influxdb_type: Some(InfluxDbType::Tag), - stats: make_stats!(min, max, String), - }); - self + Self { test_chunk_inner } } /// Adds a timestamp column with the specified min/max values - fn with_timestamp(mut self, min: i64, max: i64) -> Self { - self.builder.timestamp(); + fn with_timestamp(self, min: i64, max: i64) -> Self { + let Self { test_chunk_inner } = self; - let min = Some(min); - let max = Some(max); + let test_chunk_inner = + test_chunk_inner.with_time_column_with_stats(Some(min), Some(max)); - self.summary.columns.push(ColumnSummary { - name: TIME_COLUMN_NAME.into(), - influxdb_type: Some(InfluxDbType::Timestamp), - stats: make_stats!(min, max, I64), - }); - self + Self { test_chunk_inner } } /// Adds an I64 field column with the specified min/max values fn with_int_field( - mut self, + self, name: impl Into, min: Option, max: Option, ) -> Self { - let field_name = name.into(); - self.builder - .field(&field_name, arrow::datatypes::DataType::Int64); + let Self { test_chunk_inner } = self; - self.summary.columns.push(ColumnSummary { - name: field_name, - influxdb_type: Some(InfluxDbType::Field), - stats: make_stats!(min, max, I64), - }); - self + let test_chunk_inner = + test_chunk_inner.with_i64_field_column_with_stats(name, min, max); + + Self { test_chunk_inner } } } impl QueryChunkMeta for TestChunk { fn summary(&self) -> &TableSummary { - &self.summary + &self.test_chunk_inner.summary() } fn schema(&self) -> Arc { - let schema = self - .builder - // need to clone because `build` resets builder state - .clone() - .build() - .expect("created schema"); - Arc::new(schema) + Arc::clone(&self.test_chunk_inner.schema()) } } } From 4e53a3292835d1fd8cb878fc7640d03335c7ccd8 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sat, 10 Jul 2021 22:31:06 -0400 Subject: [PATCH 16/17] refactor: Completely replace query::provider::overlap::TestChunk with query::test::TestChunk --- query/src/provider/overlap.rs | 219 ++++++++++++++-------------------- 1 file changed, 87 insertions(+), 132 deletions(-) diff --git a/query/src/provider/overlap.rs b/query/src/provider/overlap.rs index 2360712b04..bf93f39074 100644 --- a/query/src/provider/overlap.rs +++ b/query/src/provider/overlap.rs @@ -277,10 +277,7 @@ where #[cfg(test)] mod test { use super::*; - use crate::{test::TestChunk as TestChunkInner, QueryChunk}; - use data_types::partition_metadata::TableSummary; - use internal_types::schema::Schema; - use std::sync::Arc; + use crate::{test::TestChunk, QueryChunk}; #[macro_export] macro_rules! assert_groups_eq { @@ -302,9 +299,17 @@ mod test { #[test] fn one_column_no_overlap() { - let c1 = TestChunk::new("chunk1").with_tag("tag1", Some("boston"), Some("mumbai")); + let c1 = TestChunk::new("chunk1").with_tag_column_with_stats( + "tag1", + Some("boston"), + Some("mumbai"), + ); - let c2 = TestChunk::new("chunk2").with_tag("tag1", Some("new york"), Some("zoo york")); + let c2 = TestChunk::new("chunk2").with_tag_column_with_stats( + "tag1", + Some("new york"), + Some("zoo york"), + ); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -314,9 +319,17 @@ mod test { #[test] fn one_column_overlap() { - let c1 = TestChunk::new("chunk1").with_tag("tag1", Some("boston"), Some("new york")); + let c1 = TestChunk::new("chunk1").with_tag_column_with_stats( + "tag1", + Some("boston"), + Some("new york"), + ); - let c2 = TestChunk::new("chunk2").with_tag("tag1", Some("denver"), Some("zoo york")); + let c2 = TestChunk::new("chunk2").with_tag_column_with_stats( + "tag1", + Some("denver"), + Some("zoo york"), + ); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -326,26 +339,24 @@ mod test { #[test] fn multi_columns() { - let c1 = TestChunk::new("chunk1").with_timestamp(0, 1000).with_tag( - "tag1", - Some("boston"), - Some("new york"), - ); + let c1 = TestChunk::new("chunk1") + .with_time_column_with_stats(Some(0), Some(1000)) + .with_tag_column_with_stats("tag1", Some("boston"), Some("new york")); // Overlaps in tag1, but not in time let c2 = TestChunk::new("chunk2") - .with_tag("tag1", Some("denver"), Some("zoo york")) - .with_timestamp(2000, 3000); + .with_tag_column_with_stats("tag1", Some("denver"), Some("zoo york")) + .with_time_column_with_stats(Some(2000), Some(3000)); // Overlaps in time, but not in tag1 let c3 = TestChunk::new("chunk3") - .with_tag("tag1", Some("zzx"), Some("zzy")) - .with_timestamp(500, 1500); + .with_tag_column_with_stats("tag1", Some("zzx"), Some("zzy")) + .with_time_column_with_stats(Some(500), Some(1500)); // Overlaps in time, and in tag1 let c4 = TestChunk::new("chunk4") - .with_tag("tag1", Some("aaa"), Some("zzz")) - .with_timestamp(500, 1500); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("zzz")) + .with_time_column_with_stats(Some(500), Some(1500)); let groups = group_potential_duplicates(vec![c1, c2, c3, c4]).expect("grouping succeeded"); @@ -360,8 +371,10 @@ mod test { #[test] fn boundary() { // check that overlap calculations include the bound - let c1 = TestChunk::new("chunk1").with_tag("tag1", Some("aaa"), Some("bbb")); - let c2 = TestChunk::new("chunk2").with_tag("tag1", Some("bbb"), Some("ccc")); + let c1 = + TestChunk::new("chunk1").with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")); + let c2 = + TestChunk::new("chunk2").with_tag_column_with_stats("tag1", Some("bbb"), Some("ccc")); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -372,8 +385,10 @@ mod test { #[test] fn same() { // check that if chunks overlap exactly on the boundaries they are still grouped - let c1 = TestChunk::new("chunk1").with_tag("tag1", Some("aaa"), Some("bbb")); - let c2 = TestChunk::new("chunk2").with_tag("tag1", Some("aaa"), Some("bbb")); + let c1 = + TestChunk::new("chunk1").with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")); + let c2 = + TestChunk::new("chunk2").with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -384,8 +399,10 @@ mod test { #[test] fn different_tag_names() { // check that if chunks overlap but in different tag names - let c1 = TestChunk::new("chunk1").with_tag("tag1", Some("aaa"), Some("bbb")); - let c2 = TestChunk::new("chunk2").with_tag("tag2", Some("aaa"), Some("bbb")); + let c1 = + TestChunk::new("chunk1").with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")); + let c2 = + TestChunk::new("chunk2").with_tag_column_with_stats("tag2", Some("aaa"), Some("bbb")); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -399,12 +416,12 @@ mod test { fn different_tag_names_multi_tags() { // check that if chunks overlap but in different tag names let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("aaa"), Some("bbb")); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("aaa"), Some("bbb")); let c2 = TestChunk::new("chunk2") - .with_tag("tag2", Some("aaa"), Some("bbb")) - .with_tag("tag3", Some("aaa"), Some("bbb")); + .with_tag_column_with_stats("tag2", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag3", Some("aaa"), Some("bbb")); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -416,21 +433,21 @@ mod test { #[test] fn three_column() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("xxx"), Some("yyy")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("xxx"), Some("yyy")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("xxx"), Some("yyy")) + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("xxx"), Some("yyy")) // Timestamp doesn't overlap, but the two tags do - .with_timestamp(2001, 3000); + .with_time_column_with_stats(Some(2001), Some(3000)); let c3 = TestChunk::new("chunk3") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("aaa"), Some("zzz")) + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("aaa"), Some("zzz")) // all three overlap - .with_timestamp(1000, 2000); + .with_time_column_with_stats(Some(1000), Some(2000)); let groups = group_potential_duplicates(vec![c1, c2, c3]).expect("grouping succeeded"); @@ -441,15 +458,15 @@ mod test { #[test] fn tag_order() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("xxx"), Some("yyy")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("xxx"), Some("yyy")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") - .with_tag("tag2", Some("aaa"), Some("zzz")) - .with_tag("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("aaa"), Some("zzz")) + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) // all three overlap, but tags in different order - .with_timestamp(500, 1000); + .with_time_column_with_stats(Some(500), Some(1000)); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -460,15 +477,15 @@ mod test { #[test] fn tag_order_no_tags() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("xxx"), Some("yyy")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("xxx"), Some("yyy")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") // tag1 and timestamp overlap, but no tag2 (aka it is all null) // so it could overlap if there was a null tag2 value in chunk1 - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_timestamp(500, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_time_column_with_stats(Some(500), Some(1000)); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -479,16 +496,16 @@ mod test { #[test] fn tag_order_null_stats() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", Some("xxx"), Some("yyy")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", Some("xxx"), Some("yyy")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") // tag1 and timestamp overlap, tag2 has no stats (is all null) // so they might overlap if chunk1 had a null in tag 2 - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_tag("tag2", None, None) - .with_timestamp(500, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_tag_column_with_stats("tag2", None, None) + .with_time_column_with_stats(Some(500), Some(1000)); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -499,13 +516,13 @@ mod test { #[test] fn tag_order_partial_stats() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") // tag1 has a min but not a max. Should result in error - .with_tag("tag1", Some("aaa"), None) - .with_timestamp(500, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), None) + .with_time_column_with_stats(Some(500), Some(1000)); let result = group_potential_duplicates(vec![c1, c2]).unwrap_err(); @@ -523,16 +540,16 @@ mod test { #[test] fn tag_fields_not_counted() { let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_int_field("field", Some(0), Some(2)) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_i64_field_column_with_stats("field", Some(0), Some(2)) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") // tag1 and timestamp overlap, but field value does not // should still overlap - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_int_field("field", Some(100), Some(200)) - .with_timestamp(500, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_i64_field_column_with_stats("field", Some(100), Some(200)) + .with_time_column_with_stats(Some(500), Some(1000)); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -546,15 +563,15 @@ mod test { // chunks; this will likely cause errors elsewhere in practice // as the schemas are incompatible (and can't be merged) let c1 = TestChunk::new("chunk1") - .with_tag("tag1", Some("aaa"), Some("bbb")) - .with_timestamp(0, 1000); + .with_tag_column_with_stats("tag1", Some("aaa"), Some("bbb")) + .with_time_column_with_stats(Some(0), Some(1000)); let c2 = TestChunk::new("chunk2") // tag1 column is actually a field is different in chunk // 2, so since the timestamps overlap these chunks // might also have duplicates (if tag1 was null in c1) - .with_int_field("tag1", Some(100), Some(200)) - .with_timestamp(0, 1000); + .with_i64_field_column_with_stats("tag1", Some(100), Some(200)) + .with_time_column_with_stats(Some(0), Some(1000)); let groups = group_potential_duplicates(vec![c1, c2]).expect("grouping succeeded"); @@ -567,71 +584,9 @@ mod test { fn to_string(groups: Vec>) -> Vec { let mut s = vec![]; for (idx, group) in groups.iter().enumerate() { - let names = group - .iter() - .map(|c| c.test_chunk_inner.table_name()) - .collect::>(); + let names = group.iter().map(|c| c.table_name()).collect::>(); s.push(format!("Group {}: [{}]", idx, names.join(", "))); } s } - - /// Mocked out prunable provider to use testing overlaps - #[derive(Debug)] - struct TestChunk { - test_chunk_inner: TestChunkInner, - } - - impl TestChunk { - /// Create a new TestChunk with a specified name - fn new(name: impl Into) -> Self { - Self { - test_chunk_inner: TestChunkInner::new(name), - } - } - - /// Adds a tag column with the specified min/max values - fn with_tag(self, name: impl Into, min: Option<&str>, max: Option<&str>) -> Self { - let Self { test_chunk_inner } = self; - - let test_chunk_inner = test_chunk_inner.with_tag_column_with_stats(name, min, max); - - Self { test_chunk_inner } - } - - /// Adds a timestamp column with the specified min/max values - fn with_timestamp(self, min: i64, max: i64) -> Self { - let Self { test_chunk_inner } = self; - - let test_chunk_inner = - test_chunk_inner.with_time_column_with_stats(Some(min), Some(max)); - - Self { test_chunk_inner } - } - - /// Adds an I64 field column with the specified min/max values - fn with_int_field( - self, - name: impl Into, - min: Option, - max: Option, - ) -> Self { - let Self { test_chunk_inner } = self; - - let test_chunk_inner = - test_chunk_inner.with_i64_field_column_with_stats(name, min, max); - - Self { test_chunk_inner } - } - } - - impl QueryChunkMeta for TestChunk { - fn summary(&self) -> &TableSummary { - &self.test_chunk_inner.summary() - } - - fn schema(&self) -> Arc { - Arc::clone(&self.test_chunk_inner.schema()) - } - } } From c681da1031241730e2fd286af5ca5b6ad2ee3d2f Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Sun, 11 Jul 2021 21:17:29 -0400 Subject: [PATCH 17/17] refactor: Define the TestChunk methods with macros --- query/src/test.rs | 222 +++++++++++++++------------------------------- 1 file changed, 70 insertions(+), 152 deletions(-) diff --git a/query/src/test.rs b/query/src/test.rs index 3e0a7c36d4..b8c2be37ac 100644 --- a/query/src/test.rs +++ b/query/src/test.rs @@ -159,6 +159,64 @@ pub struct TestChunk { predicate_match: Option, } +/// Implements a method for adding a column with default stats +macro_rules! impl_with_column { + ($NAME:ident, $DATA_TYPE:ident) => { + pub fn $NAME(self, column_name: impl Into) -> Self { + let column_name = column_name.into(); + + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::$DATA_TYPE) + .build() + .unwrap(); + self.add_schema_to_table(new_column_schema, true, None) + } + }; +} + +/// Implements a method for adding a column without any stats +macro_rules! impl_with_column_no_stats { + ($NAME:ident, $DATA_TYPE:ident) => { + pub fn $NAME(self, column_name: impl Into) -> Self { + let column_name = column_name.into(); + + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::$DATA_TYPE) + .build() + .unwrap(); + + self.add_schema_to_table(new_column_schema, false, None) + } + }; +} + +/// Implements a method for adding a column with stats that have the specified min and max +macro_rules! impl_with_column_with_stats { + ($NAME:ident, $DATA_TYPE:ident, $RUST_TYPE:ty, $STAT_TYPE:ident) => { + pub fn $NAME( + self, + column_name: impl Into, + min: Option<$RUST_TYPE>, + max: Option<$RUST_TYPE>, + ) -> Self { + let column_name = column_name.into(); + + let new_column_schema = SchemaBuilder::new() + .field(&column_name, DataType::$DATA_TYPE) + .build() + .unwrap(); + + let stats = Statistics::$STAT_TYPE(StatValues { + min, + max, + ..Default::default() + }); + + self.add_schema_to_table(new_column_schema, true, Some(stats)) + } + }; +} + impl TestChunk { pub fn new(table_name: impl Into) -> Self { let table_name = table_name.into(); @@ -267,161 +325,21 @@ impl TestChunk { self.add_schema_to_table(new_column_schema, true, Some(stats)) } - /// Register an int field column with the test chunk with default stats - pub fn with_i64_field_column(self, column_name: impl Into) -> Self { - let column_name = column_name.into(); + impl_with_column!(with_i64_field_column, Int64); + impl_with_column_no_stats!(with_i64_field_column_no_stats, Int64); + impl_with_column_with_stats!(with_i64_field_column_with_stats, Int64, i64, I64); - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Int64) - .build() - .unwrap(); - self.add_schema_to_table(new_column_schema, true, None) - } + impl_with_column!(with_u64_column, UInt64); + impl_with_column_no_stats!(with_u64_field_column_no_stats, UInt64); + impl_with_column_with_stats!(with_u64_field_column_with_stats, UInt64, u64, U64); - /// Adds an i64 column named into the schema, but with no stats - pub fn with_i64_field_column_no_stats(self, column_name: impl Into) -> Self { - let column_name = column_name.into(); + impl_with_column!(with_f64_field_column, Float64); + impl_with_column_no_stats!(with_f64_field_column_no_stats, Float64); + impl_with_column_with_stats!(with_f64_field_column_with_stats, Float64, f64, F64); - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Int64) - .build() - .unwrap(); - - self.add_schema_to_table(new_column_schema, false, None) - } - - pub fn with_i64_field_column_with_stats( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Int64) - .build() - .unwrap(); - - // Construct stats - let stats = Statistics::I64(StatValues { - min, - max, - ..Default::default() - }); - - self.add_schema_to_table(new_column_schema, true, Some(stats)) - } - - /// Register a u64 field column with the test chunk with default stats - pub fn with_u64_field_column(self, column_name: impl Into) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::UInt64) - .build() - .unwrap(); - self.add_schema_to_table(new_column_schema, true, None) - } - - pub fn with_u64_field_column_with_stats( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::UInt64) - .build() - .unwrap(); - - // Construct stats - let stats = Statistics::U64(StatValues { - min, - max, - ..Default::default() - }); - - self.add_schema_to_table(new_column_schema, true, Some(stats)) - } - - /// Register an f64 field column with the test chunk with default stats - pub fn with_f64_field_column(self, column_name: impl Into) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Float64) - .build() - .unwrap(); - - self.add_schema_to_table(new_column_schema, true, None) - } - - /// Register an f64 field column with the test chunk - pub fn with_f64_field_column_with_stats( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Float64) - .build() - .unwrap(); - - // Construct stats - let stats = Statistics::F64(StatValues { - min, - max, - ..Default::default() - }); - - self.add_schema_to_table(new_column_schema, true, Some(stats)) - } - - /// Register a bool field column with the test chunk - pub fn with_bool_field_column_with_stats( - self, - column_name: impl Into, - min: Option, - max: Option, - ) -> Self { - let column_name = column_name.into(); - - // make a new schema with the specified column and - // merge it in to any existing schema - let new_column_schema = SchemaBuilder::new() - .field(&column_name, DataType::Boolean) - .build() - .unwrap(); - - // Construct stats - let stats = Statistics::Bool(StatValues { - min, - max, - ..Default::default() - }); - - self.add_schema_to_table(new_column_schema, true, Some(stats)) - } + impl_with_column!(with_bool_field_column, Boolean); + impl_with_column_no_stats!(with_bool_field_column_no_stats, Boolean); + impl_with_column_with_stats!(with_bool_field_column_with_stats, Boolean, bool, Bool); /// Register a string field column with the test chunk pub fn with_string_field_column_with_stats(