From 78d3749af50d392a861ebb6e1b82ce87ac3db60d Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 10:45:14 +0100 Subject: [PATCH 1/6] feat: size dictionary encoding by allocated space --- read_buffer/src/column/encoding/string.rs | 2 +- .../src/column/encoding/string/dictionary.rs | 65 +++++++++++++------ read_buffer/src/column/string.rs | 2 +- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/read_buffer/src/column/encoding/string.rs b/read_buffer/src/column/encoding/string.rs index 85975c9980..42574e669e 100644 --- a/read_buffer/src/column/encoding/string.rs +++ b/read_buffer/src/column/encoding/string.rs @@ -31,7 +31,7 @@ impl Encoding { pub fn size(&self) -> usize { match &self { Self::RLE(enc) => enc.size(), - Self::Plain(enc) => enc.size(), + Self::Plain(enc) => enc.size(false), } } diff --git a/read_buffer/src/column/encoding/string/dictionary.rs b/read_buffer/src/column/encoding/string/dictionary.rs index fdb29a9141..d5c418cdf9 100644 --- a/read_buffer/src/column/encoding/string/dictionary.rs +++ b/read_buffer/src/column/encoding/string/dictionary.rs @@ -47,7 +47,7 @@ impl Default for Dictionary { } impl Dictionary { - /// Initialises an Dictionar encoding with a set of logical values. + /// Initialises a Dictionary encoding with a set of logical values. /// Creating an encoding using `with_dictionary` ensures that the dictionary /// is in the correct order, and will allow values to be inserted with any /// value in the dictionary. @@ -61,22 +61,33 @@ impl Dictionary { } /// A reasonable estimation of the on-heap size this encoding takes up. - pub fn size(&self) -> usize { - // the total size of all decoded values in the column. - let decoded_keys_size = self + /// If `buffers` is true then all allocated buffers in the encoding are + /// accounted for. + pub fn size(&self, buffers: bool) -> usize { + let base_size = size_of::(); + + // Total size of all decoded values in the column. + let mut decoded_keys_size = self .entries .iter() .map(|k| match k { - Some(v) => v.len(), + Some(v) => v.len(), None => 0, } + size_of::>()) .sum::(); - let entries_size = size_of::>>() + decoded_keys_size; - let encoded_ids_size = size_of::>() + (size_of::() * self.encoded_data.len()); + if buffers { + decoded_keys_size += + (self.entries.capacity() - self.entries.len()) * size_of::>(); + } - // + 1 for contains_null field - entries_size + encoded_ids_size + 1 + let encoded_ids_size = size_of::() + * match buffers { + true => self.encoded_data.capacity(), + false => self.encoded_data.len(), + }; + + base_size + decoded_keys_size + encoded_ids_size } /// A reasonable estimation of the on-heap size of the underlying string @@ -837,7 +848,7 @@ impl std::fmt::Display for Dictionary { f, "[{}] size: {:?} rows: {:?} cardinality: {}", ENCODING_NAME, - self.size(), + self.size(false), self.num_rows(), self.cardinality(), ) @@ -873,17 +884,13 @@ mod test { enc.push_none(); enc.push_none(); - // keys - 14 bytes. - - // 3 string entries in dictionary - // entries is 24 + (24*4) + 14 == 134 - + // Self - 24+24+8 = 56 bytes (two vectors, a bool and padding) + // 4 string entries (inc NULL) in vec = 4 * 24 = 96 + // 3 string entries with length 4+5+5 = 14 // 15 rows. - // encoded ids is 24 + (4 * 15) == 84 - - // 134 + 84 + 1 == 219 - - assert_eq!(enc.size(), 219); + // encoded ids is (4 * 15) == 60 + // 56 + 96 + 14 + 60 = 226 + assert_eq!(enc.size(false), 226); // check dictionary assert_eq!( @@ -899,6 +906,24 @@ mod test { enc.encoded_data, vec![1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 3, NULL_ID, NULL_ID, NULL_ID, NULL_ID] ); + + // check for allocated size + let mut enc = Dictionary::default(); + enc.encoded_data.reserve_exact(40); + enc.entries.reserve_exact(39); // account for already-allocated NULL element + enc.push_additional(Some("east".to_string()), 3); + enc.push_additional(Some("north".to_string()), 1); + enc.push_additional(Some("east".to_string()), 5); + enc.push_additional(Some("south".to_string()), 2); + enc.push_additional(None, 4); + + // Self - 24+24+8 = 56 bytes (two vectors, a bool and padding) + // 40 string entries (inc NULL) in vec = 40 * 24 = 960 + // 3 string entries with lengths 4+5+5 = 14 + // 15 rows but 40 elements allocated + // encoded ids is (40 * 4) == 160 + // 56 + 960 + 14 + 160 = 1190 + assert_eq!(enc.size(true), 1190); } #[test] diff --git a/read_buffer/src/column/string.rs b/read_buffer/src/column/string.rs index 87649b9cd5..709ccfe259 100644 --- a/read_buffer/src/column/string.rs +++ b/read_buffer/src/column/string.rs @@ -31,7 +31,7 @@ impl StringEncoding { pub fn size(&self) -> usize { match self { Self::RleDictionary(enc) => enc.size(), - Self::Dictionary(enc) => enc.size(), + Self::Dictionary(enc) => enc.size(false), } } From b4f8e854f63dad21ddbae286576b4a0679d331e5 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 10:46:31 +0100 Subject: [PATCH 2/6] feat: size rle string encoding by allocated buffers --- read_buffer/src/chunk.rs | 2 +- read_buffer/src/column/encoding/string.rs | 2 +- read_buffer/src/column/encoding/string/rle.rs | 75 ++++++++++++------- read_buffer/src/column/string.rs | 2 +- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/read_buffer/src/chunk.rs b/read_buffer/src/chunk.rs index e3f2f22645..db3bf8bdf9 100644 --- a/read_buffer/src/chunk.rs +++ b/read_buffer/src/chunk.rs @@ -662,7 +662,7 @@ mod test { r#"read_buffer_column_bytes{db="mydb",encoding="FBT_U8-FIXEDN",log_data_type="f64"} 800"#, r#"read_buffer_column_bytes{db="mydb",encoding="FIXED",log_data_type="f64"} 96"#, r#"read_buffer_column_bytes{db="mydb",encoding="FIXEDN",log_data_type="bool"} 672"#, - r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 500"#, + r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 506"#, "# HELP read_buffer_column_raw_bytes The number of bytes used by all columns if they were uncompressed in the Read Buffer", "# TYPE read_buffer_column_raw_bytes gauge", r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32-FIXED",log_data_type="i64",null="false"} 96"#, diff --git a/read_buffer/src/column/encoding/string.rs b/read_buffer/src/column/encoding/string.rs index 42574e669e..c16fc40390 100644 --- a/read_buffer/src/column/encoding/string.rs +++ b/read_buffer/src/column/encoding/string.rs @@ -30,7 +30,7 @@ impl Encoding { pub fn size(&self) -> usize { match &self { - Self::RLE(enc) => enc.size(), + Self::RLE(enc) => enc.size(false), Self::Plain(enc) => enc.size(false), } } diff --git a/read_buffer/src/column/encoding/string/rle.rs b/read_buffer/src/column/encoding/string/rle.rs index edb471b551..9818a1203b 100644 --- a/read_buffer/src/column/encoding/string/rle.rs +++ b/read_buffer/src/column/encoding/string/rle.rs @@ -3,8 +3,6 @@ use std::convert::From; use std::iter; use std::mem::size_of; -use croaring::Bitmap; - use arrow::array::{Array, StringArray}; use super::NULL_ID; @@ -75,13 +73,18 @@ impl RLE { } /// A reasonable estimation of the on-heap size this encoding takes up. - pub fn size(&self) -> usize { - // the total size of all decoded values in the column. - let decoded_keys_size = self.index_entries.iter().map(|k| k.len()).sum::(); + /// If `buffers` is true then the size of all allocated buffers in the + /// encoding are accounted for. + pub fn size(&self, buffers: bool) -> usize { + let base_size = size_of::(); - let index_entry_size = size_of::>() // container size - + (size_of::() * self.index_entries.len()) // elements size - + decoded_keys_size; // heap allocated strings size + let mut index_entries_size = size_of::() + * match buffers { + true => self.index_entries.capacity(), + false => self.index_entries.len(), + }; + // the total size of all decoded values in the column. + index_entries_size += self.index_entries.iter().map(|k| k.len()).sum::(); // The total size (an upper bound estimate) of all the bitmaps // in the column. @@ -91,14 +94,16 @@ impl RLE { .map(|row_ids| row_ids.size()) .sum::(); - let index_row_ids_size = size_of::>() - + (size_of::() * self.index_row_ids.len()) - + row_ids_bitmaps_size; + let index_row_ids_size = + (size_of::() * self.index_row_ids.len()) + row_ids_bitmaps_size; - let run_lengths_size = size_of::>() + // container size - (size_of::<(u32, u32)>() * self.run_lengths.len()); // each run-length size + let run_lengths_size = size_of::<(u32, u32)>() + * match buffers { + true => self.run_lengths.capacity(), + false => self.run_lengths.len(), + }; - index_entry_size + index_row_ids_size + run_lengths_size + 1 + 4 + base_size + index_entries_size + index_row_ids_size + run_lengths_size } /// A reasonable estimation of the on-heap size of the underlying string @@ -958,7 +963,7 @@ impl std::fmt::Display for RLE { f, "[{}] size: {:?} rows: {:?} cardinality: {}, nulls: {} runs: {} ", ENCODING_NAME, - self.size(), + self.size(false), self.num_rows, self.cardinality(), self.null_count(), @@ -1000,22 +1005,34 @@ mod test { enc.push_none(); enc.push_none(); - // Note: there are 4 index entries to account for NULL entry. - // `index_entry` is 24 + (24*4) + 14 == 134 + // * Self: 24 + 24 + 24 + 1 + (padding 3b) + 4 = 80b + // * index entries: (4) are is (24*4) + 14 == 110 + // * index row ids: (bitmaps) is (4 * 4) + (108b for bitmaps) == 124 + // * run lengths: (8*5) == 40 // - // bitmaps for east, north, south and NULL entries. - // `index_row_ids` is 24 + (4 * 4) + (108b for bitmaps) == 148 - // - // `run lengths` is 24 + (8*5) == 64 - // - // `contains_null` - 1 byte - // `num_rows` - 4 bytes - // - // 351 + // 354 + // assert_eq!(enc.size(false), 354); - // TODO(edd): there some mystery bytes in the bitmap implementation. - // need to figure out how to measure these - assert_eq!(enc.size(), 351); + // check allocated size + let mut enc = RLE::default(); + enc.index_entries.reserve_exact(39); // account for already-allocated NULL element + enc.run_lengths.reserve_exact(40); + + enc.push_additional(Some("east".to_string()), 3); + enc.push_additional(Some("north".to_string()), 1); + enc.push_additional(Some("east".to_string()), 5); + enc.push_additional(Some("south".to_string()), 2); + enc.push_none(); + enc.push_none(); + enc.push_none(); + enc.push_none(); + + // * Self: 24 + 24 + 24 + 1 + (padding 3b) + 4 = 80b + // * index entries: (40 * 24) + 14 == 974 + // * index row ids: (bitmaps) is (4 * 4) + (108b for bitmaps) == 124 + // * run lengths: (40 * 8) == 320 + // + assert_eq!(enc.size(true), 1498); } #[test] diff --git a/read_buffer/src/column/string.rs b/read_buffer/src/column/string.rs index 709ccfe259..6987e9d538 100644 --- a/read_buffer/src/column/string.rs +++ b/read_buffer/src/column/string.rs @@ -30,7 +30,7 @@ impl StringEncoding { /// The estimated total size in bytes of the in-memory columnar data. pub fn size(&self) -> usize { match self { - Self::RleDictionary(enc) => enc.size(), + Self::RleDictionary(enc) => enc.size(false), Self::Dictionary(enc) => enc.size(false), } } From 11349fa30dce66f6c7b10c8fa4ef72b004d659df Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 10:14:57 +0100 Subject: [PATCH 3/6] feat: add allocated size to bool --- read_buffer/src/column/boolean.rs | 2 +- read_buffer/src/column/encoding/bool.rs | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/read_buffer/src/column/boolean.rs b/read_buffer/src/column/boolean.rs index c568022fc1..a356b18622 100644 --- a/read_buffer/src/column/boolean.rs +++ b/read_buffer/src/column/boolean.rs @@ -11,7 +11,7 @@ impl BooleanEncoding { /// The total size in bytes of the store columnar data. pub fn size(&self) -> usize { match self { - Self::BooleanNull(enc) => enc.size(), + Self::BooleanNull(enc) => enc.size(false), } } diff --git a/read_buffer/src/column/encoding/bool.rs b/read_buffer/src/column/encoding/bool.rs index bd844633d9..5dd6891a2a 100644 --- a/read_buffer/src/column/encoding/bool.rs +++ b/read_buffer/src/column/encoding/bool.rs @@ -1,6 +1,7 @@ //! An encoding nullable bool, by an Arrow array. use std::cmp::Ordering; use std::fmt::Debug; +use std::mem::size_of; use arrow::array::{Array, BooleanArray}; use cmp::Operator; @@ -19,7 +20,7 @@ impl std::fmt::Display for Bool { "[Bool] rows: {:?}, nulls: {:?}, size: {}", self.arr.len(), self.arr.null_count(), - self.size() + self.size(false) ) } } @@ -42,8 +43,12 @@ impl Bool { /// Returns an estimation of the total size in bytes used by this column /// encoding. - pub fn size(&self) -> usize { - std::mem::size_of::() + self.arr.get_array_memory_size() + pub fn size(&self, buffers: bool) -> usize { + size_of::() + + match buffers { + true => self.arr.get_array_memory_size(), // includes buffer capacities + false => self.arr.get_buffer_memory_size(), + } } /// The estimated total size in bytes of the underlying bool values in the @@ -360,7 +365,8 @@ mod test { #[test] fn size() { let v = Bool::from(vec![None, None, Some(true), Some(false)].as_slice()); - assert_eq!(v.size(), 400); + assert_eq!(v.size(false), 256); + assert_eq!(v.size(true), 400); // includes allocated buffers } #[test] From 0e8b0edfc9e4fd432a45c425826997d87a29e777 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 10:43:21 +0100 Subject: [PATCH 4/6] feat: add buffer-based sizing for numerical encodings --- read_buffer/src/chunk.rs | 4 +-- read_buffer/src/column/encoding/scalar.rs | 6 ++-- .../src/column/encoding/scalar/fixed.rs | 25 +++++++++++--- .../src/column/encoding/scalar/fixed_null.rs | 13 +++++--- read_buffer/src/column/encoding/scalar/rle.rs | 33 ++++++++++++++----- read_buffer/src/column/float.rs | 2 +- read_buffer/src/column/integer.rs | 26 +++++++-------- 7 files changed, 74 insertions(+), 35 deletions(-) diff --git a/read_buffer/src/chunk.rs b/read_buffer/src/chunk.rs index db3bf8bdf9..fcc4638cdd 100644 --- a/read_buffer/src/chunk.rs +++ b/read_buffer/src/chunk.rs @@ -659,9 +659,9 @@ mod test { "# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer", "# TYPE read_buffer_column_bytes gauge", r#"read_buffer_column_bytes{db="mydb",encoding="BT_U32-FIXED",log_data_type="i64"} 72"#, - r#"read_buffer_column_bytes{db="mydb",encoding="FBT_U8-FIXEDN",log_data_type="f64"} 800"#, + r#"read_buffer_column_bytes{db="mydb",encoding="FBT_U8-FIXEDN",log_data_type="f64"} 512"#, r#"read_buffer_column_bytes{db="mydb",encoding="FIXED",log_data_type="f64"} 96"#, - r#"read_buffer_column_bytes{db="mydb",encoding="FIXEDN",log_data_type="bool"} 672"#, + r#"read_buffer_column_bytes{db="mydb",encoding="FIXEDN",log_data_type="bool"} 384"#, r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 506"#, "# HELP read_buffer_column_raw_bytes The number of bytes used by all columns if they were uncompressed in the Read Buffer", "# TYPE read_buffer_column_raw_bytes gauge", diff --git a/read_buffer/src/column/encoding/scalar.rs b/read_buffer/src/column/encoding/scalar.rs index 282c3a6ec0..8789d8fdbb 100644 --- a/read_buffer/src/column/encoding/scalar.rs +++ b/read_buffer/src/column/encoding/scalar.rs @@ -18,8 +18,10 @@ pub trait ScalarEncoding: Debug + Display + Send + Sync { /// A useful name for the encoding, likely used in instrumentation. fn name(&self) -> &'static str; - /// The total size in bytes to store encoded data in memory. - fn size(&self) -> usize; + /// The total size in bytes to store encoded data in memory. If `buffers` + /// is true then the returned size should account for any allocated buffers + /// within the contained encoding structures. + fn size(&self, buffers: bool) -> usize; /// The estimated total size in bytes of the underlying encoded values if /// they were stored contiguously as a vector of `L`. `include_null` should diff --git a/read_buffer/src/column/encoding/scalar/fixed.rs b/read_buffer/src/column/encoding/scalar/fixed.rs index edb9013c71..d6689e97d9 100644 --- a/read_buffer/src/column/encoding/scalar/fixed.rs +++ b/read_buffer/src/column/encoding/scalar/fixed.rs @@ -53,7 +53,7 @@ where "[{}] rows: {:?}, size: {}", self.name(), self.num_rows(), - self.size() + self.size(false) ) } } @@ -252,9 +252,13 @@ where self.values.len() as u32 } - /// Encoded data size including `Self` - an "accurate" estimation. - fn size(&self) -> usize { - size_of::() + (size_of::

() * self.values.len()) + fn size(&self, buffers: bool) -> usize { + let values = size_of::

() + * match buffers { + true => self.values.capacity(), + false => self.values.len(), + }; + size_of::() + values } fn size_raw(&self, _: bool) -> usize { @@ -425,6 +429,19 @@ mod test { (Fixed::new(values, Arc::clone(&mock)), mock) } + #[test] + fn size() { + let (v, _) = new_encoding(vec![22_i64, 1, 18]); + // Self if 32 bytes and there are 3 * 8b values + assert_eq!(v.size(false), 56); + + // check pre-allocated sizing + let (mut v, _) = new_encoding(vec![]); + v.values.reserve_exact(40); + // Self if 32 bytes and there are 40 * 8b values allocated + assert_eq!(v.size(true), 352); + } + #[test] fn value() { let (v, transcoder) = new_encoding(vec![22, 1, 18]); diff --git a/read_buffer/src/column/encoding/scalar/fixed_null.rs b/read_buffer/src/column/encoding/scalar/fixed_null.rs index 2179e8f8f4..922f597a77 100644 --- a/read_buffer/src/column/encoding/scalar/fixed_null.rs +++ b/read_buffer/src/column/encoding/scalar/fixed_null.rs @@ -52,7 +52,7 @@ where self.name(), self.arr.len(), self.arr.null_count(), - self.size() + self.size(false) ) } } @@ -260,8 +260,12 @@ where self.arr.null_count() as u32 } - fn size(&self) -> usize { - size_of::() + self.arr.get_array_memory_size() + fn size(&self, buffers: bool) -> usize { + size_of::() + + match buffers { + true => self.arr.get_array_memory_size(), + false => self.arr.get_buffer_memory_size(), + } } /// The estimated total size in bytes of the underlying values in the @@ -478,7 +482,8 @@ mod test { #[test] fn size() { let (v, _) = new_encoding(vec![None, None, Some(100), Some(2222)]); - assert_eq!(v.size(), 408); + assert_eq!(v.size(false), 264); + assert_eq!(v.size(true), 408); // includes allocated buffers } #[test] diff --git a/read_buffer/src/column/encoding/scalar/rle.rs b/read_buffer/src/column/encoding/scalar/rle.rs index 2f6d8c2fab..fc8c1b3bc2 100644 --- a/read_buffer/src/column/encoding/scalar/rle.rs +++ b/read_buffer/src/column/encoding/scalar/rle.rs @@ -70,7 +70,7 @@ where f, "[{}] size: {:?} rows: {:?} nulls: {} runs: {} ", self.name(), - self.size(), + self.size(false), self.num_rows(), self.null_count(), self.run_lengths.len() @@ -343,8 +343,13 @@ where ENCODING_NAME } - fn size(&self) -> usize { - std::mem::size_of::() + (self.run_lengths.len() * size_of::<(u32, Option

)>()) + fn size(&self, buffers: bool) -> usize { + let values = size_of::<(u32, Option

)>() + * match buffers { + true => self.run_lengths.capacity(), + false => self.run_lengths.len(), + }; + std::mem::size_of::() + values } fn size_raw(&self, include_nulls: bool) -> usize { @@ -713,16 +718,26 @@ mod test { fn size() { let (mut enc, _) = new_encoding(vec![]); - // 40b Self + (0 rl * 24) = 32 - assert_eq!(enc.size(), 40); + // 40b Self + (0 rl * 24) = 40 + assert_eq!(enc.size(false), 40); enc.push_none(); - // 40b Self + (1 rl * 24) = 56 - assert_eq!(enc.size(), 64); + // 40b Self + (1 rl * 24) = 64 + assert_eq!(enc.size(false), 64); enc.push_additional_some(1, 10); - // 40b Self + (2 rl * 24) = 80 - assert_eq!(enc.size(), 88); + // 40b Self + (2 rl * 24) = 88 + assert_eq!(enc.size(false), 88); + + // check allocated buffer size + let (mut enc, _) = new_encoding(vec![]); + enc.run_lengths.reserve_exact(40); + // 40b Self + (40 rl * 24) = 1000b + assert_eq!(enc.size(true), 1000); + + // 40b Self + (40 rl * 24) = 1000b - no new allocations + enc.push_additional_some(1, 10); + assert_eq!(enc.size(true), 1000); } #[test] diff --git a/read_buffer/src/column/float.rs b/read_buffer/src/column/float.rs index 16ac473055..5f08e6fcd7 100644 --- a/read_buffer/src/column/float.rs +++ b/read_buffer/src/column/float.rs @@ -32,7 +32,7 @@ impl FloatEncoding { /// The total size in bytes of to store columnar data in memory. pub fn size(&self) -> usize { match self { - Self::F64(enc, _) => enc.size(), + Self::F64(enc, _) => enc.size(false), } } diff --git a/read_buffer/src/column/integer.rs b/read_buffer/src/column/integer.rs index 2118edf85a..3cd2dcd04d 100644 --- a/read_buffer/src/column/integer.rs +++ b/read_buffer/src/column/integer.rs @@ -27,8 +27,8 @@ impl IntegerEncoding { /// The total size in bytes of the store columnar data. pub fn size(&self) -> usize { match self { - Self::I64(enc, _) => enc.size(), - Self::U64(enc, _) => enc.size(), + Self::I64(enc, _) => enc.size(false), + Self::U64(enc, _) => enc.size(false), } } @@ -971,13 +971,13 @@ mod test { // Input data containing NULL will be stored in an Arrow array encoding let cases = vec![ - (vec![None, Some(0_i64)], 400_usize), // u8 Arrow array - (vec![None, Some(-120_i64)], 400), // i8 - (vec![None, Some(399_i64)], 400), // u16 - (vec![None, Some(-399_i64)], 400), // i16 - (vec![None, Some(u32::MAX as i64)], 400), // u32 - (vec![None, Some(i32::MIN as i64)], 400), // i32 - (vec![None, Some(u32::MAX as i64 + 1)], 400), //u64 + (vec![None, Some(0_i64)], 256_usize), // u8 Arrow array + (vec![None, Some(-120_i64)], 256), // i8 + (vec![None, Some(399_i64)], 256), // u16 + (vec![None, Some(-399_i64)], 256), // i16 + (vec![None, Some(u32::MAX as i64)], 256), // u32 + (vec![None, Some(i32::MIN as i64)], 256), // i32 + (vec![None, Some(u32::MAX as i64 + 1)], 256), //u64 ]; for (case, name) in cases.iter().cloned() { @@ -1163,10 +1163,10 @@ mod test { // Input data containing NULL will be stored in an Arrow array encoding let cases = vec![ - (vec![None, Some(0_u64)], 400_usize), - (vec![None, Some(399_u64)], 400), - (vec![None, Some(u32::MAX as u64)], 400), - (vec![None, Some(u64::MAX)], 400), + (vec![None, Some(0_u64)], 256_usize), + (vec![None, Some(399_u64)], 256), + (vec![None, Some(u32::MAX as u64)], 256), + (vec![None, Some(u64::MAX)], 256), ]; for (case, size) in cases.iter().cloned() { From c68bbb630940da08a15f711be3189f972ae3862f Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 11:21:28 +0100 Subject: [PATCH 5/6] test: update test --- query_tests/src/sql.rs | 8 ++++---- server/src/db.rs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/query_tests/src/sql.rs b/query_tests/src/sql.rs index 1ba61c3b05..e554ba3681 100644 --- a/query_tests/src/sql.rs +++ b/query_tests/src/sql.rs @@ -369,10 +369,10 @@ async fn sql_select_from_system_chunk_columns() { "+---------------+----------+------------+-------------+-------------------+-----------+------------+-----------+-----------+--------------+", "| partition_key | chunk_id | table_name | column_name | storage | row_count | null_count | min_value | max_value | memory_bytes |", "+---------------+----------+------------+-------------+-------------------+-----------+------------+-----------+-----------+--------------+", - "| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | 0 | Boston | Boston | 252 |", - "| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 425 |", - "| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | 0 | MA | MA | 240 |", - "| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 425 |", + "| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | 0 | Boston | Boston | 255 |", + "| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 281 |", + "| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | 0 | MA | MA | 243 |", + "| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 281 |", "| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 0 | 50 | 250 | 51 |", "| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 2 | 1 | Boston | Boston | 35 |", "| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 2 | 1 | 51 | 51 | 25 |", diff --git a/server/src/db.rs b/server/src/db.rs index ae79f37ff9..337d36a0ca 100644 --- a/server/src/db.rs +++ b/server/src/db.rs @@ -2531,7 +2531,7 @@ mod tests { ("svr_id", "1"), ]) .histogram() - .sample_sum_eq(3191.0) + .sample_sum_eq(3197.0) .unwrap(); let rb = collect_read_filter(&rb_chunk).await; @@ -3400,7 +3400,7 @@ mod tests { id: 2, storage: ChunkStorage::ReadBufferAndObjectStore, lifecycle_action, - memory_bytes: 3284, // size of RB and OS chunks + memory_bytes: 3140, // size of RB and OS chunks object_store_bytes: 1577, // size of parquet file row_count: 2, time_of_last_access: None, @@ -3451,7 +3451,7 @@ mod tests { } assert_eq!(db.catalog.metrics().memory().mutable_buffer(), 2486 + 87); - assert_eq!(db.catalog.metrics().memory().read_buffer(), 2410); + assert_eq!(db.catalog.metrics().memory().read_buffer(), 2266); assert_eq!(db.catalog.metrics().memory().object_store(), 874); } From e78aebdf1969d6431831c74ac3e071f3c702d2ff Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 12 Aug 2021 15:57:01 +0100 Subject: [PATCH 6/6] refactor: update read_buffer/src/column/encoding/scalar/fixed.rs Co-authored-by: Andrew Lamb --- read_buffer/src/column/encoding/scalar/fixed.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read_buffer/src/column/encoding/scalar/fixed.rs b/read_buffer/src/column/encoding/scalar/fixed.rs index d6689e97d9..6cf6b0b63e 100644 --- a/read_buffer/src/column/encoding/scalar/fixed.rs +++ b/read_buffer/src/column/encoding/scalar/fixed.rs @@ -432,7 +432,7 @@ mod test { #[test] fn size() { let (v, _) = new_encoding(vec![22_i64, 1, 18]); - // Self if 32 bytes and there are 3 * 8b values + // Self is 32 bytes and there are 3 * 8b values assert_eq!(v.size(false), 56); // check pre-allocated sizing