Merge pull request #2267 from influxdata/er/read_buffer/cap
feat: Add API to encodings to get allocated buffer sizespull/24376/head
commit
ac28f83cf0
|
@ -369,10 +369,10 @@ async fn sql_select_from_system_chunk_columns() {
|
|||
"+---------------+----------+------------+-------------+-------------------+-----------+------------+-----------+-----------+--------------+",
|
||||
"| partition_key | chunk_id | table_name | column_name | storage | row_count | null_count | min_value | max_value | memory_bytes |",
|
||||
"+---------------+----------+------------+-------------+-------------------+-----------+------------+-----------+-----------+--------------+",
|
||||
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | 0 | Boston | Boston | 252 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | 0 | MA | MA | 240 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 425 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | city | ReadBuffer | 2 | 0 | Boston | Boston | 255 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | other_temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 281 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | state | ReadBuffer | 2 | 0 | MA | MA | 243 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | temp | ReadBuffer | 2 | 1 | 70.4 | 70.4 | 281 |",
|
||||
"| 1970-01-01T00 | 0 | h2o | time | ReadBuffer | 2 | 0 | 50 | 250 | 51 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | city | OpenMutableBuffer | 2 | 1 | Boston | Boston | 35 |",
|
||||
"| 1970-01-01T00 | 0 | o2 | reading | OpenMutableBuffer | 2 | 1 | 51 | 51 | 25 |",
|
||||
|
|
|
@ -659,10 +659,10 @@ mod test {
|
|||
"# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer",
|
||||
"# TYPE read_buffer_column_bytes gauge",
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="BT_U32-FIXED",log_data_type="i64"} 72"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="FBT_U8-FIXEDN",log_data_type="f64"} 800"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="FBT_U8-FIXEDN",log_data_type="f64"} 512"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="FIXED",log_data_type="f64"} 96"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="FIXEDN",log_data_type="bool"} 672"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 500"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="FIXEDN",log_data_type="bool"} 384"#,
|
||||
r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 506"#,
|
||||
"# HELP read_buffer_column_raw_bytes The number of bytes used by all columns if they were uncompressed in the Read Buffer",
|
||||
"# TYPE read_buffer_column_raw_bytes gauge",
|
||||
r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32-FIXED",log_data_type="i64",null="false"} 96"#,
|
||||
|
|
|
@ -11,7 +11,7 @@ impl BooleanEncoding {
|
|||
/// The total size in bytes of the store columnar data.
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::BooleanNull(enc) => enc.size(),
|
||||
Self::BooleanNull(enc) => enc.size(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! An encoding nullable bool, by an Arrow array.
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Debug;
|
||||
use std::mem::size_of;
|
||||
|
||||
use arrow::array::{Array, BooleanArray};
|
||||
use cmp::Operator;
|
||||
|
@ -19,7 +20,7 @@ impl std::fmt::Display for Bool {
|
|||
"[Bool] rows: {:?}, nulls: {:?}, size: {}",
|
||||
self.arr.len(),
|
||||
self.arr.null_count(),
|
||||
self.size()
|
||||
self.size(false)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -42,8 +43,12 @@ impl Bool {
|
|||
|
||||
/// Returns an estimation of the total size in bytes used by this column
|
||||
/// encoding.
|
||||
pub fn size(&self) -> usize {
|
||||
std::mem::size_of::<BooleanArray>() + self.arr.get_array_memory_size()
|
||||
pub fn size(&self, buffers: bool) -> usize {
|
||||
size_of::<Self>()
|
||||
+ match buffers {
|
||||
true => self.arr.get_array_memory_size(), // includes buffer capacities
|
||||
false => self.arr.get_buffer_memory_size(),
|
||||
}
|
||||
}
|
||||
|
||||
/// The estimated total size in bytes of the underlying bool values in the
|
||||
|
@ -360,7 +365,8 @@ mod test {
|
|||
#[test]
|
||||
fn size() {
|
||||
let v = Bool::from(vec![None, None, Some(true), Some(false)].as_slice());
|
||||
assert_eq!(v.size(), 400);
|
||||
assert_eq!(v.size(false), 256);
|
||||
assert_eq!(v.size(true), 400); // includes allocated buffers
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -18,8 +18,10 @@ pub trait ScalarEncoding<L>: Debug + Display + Send + Sync {
|
|||
/// A useful name for the encoding, likely used in instrumentation.
|
||||
fn name(&self) -> &'static str;
|
||||
|
||||
/// The total size in bytes to store encoded data in memory.
|
||||
fn size(&self) -> usize;
|
||||
/// The total size in bytes to store encoded data in memory. If `buffers`
|
||||
/// is true then the returned size should account for any allocated buffers
|
||||
/// within the contained encoding structures.
|
||||
fn size(&self, buffers: bool) -> usize;
|
||||
|
||||
/// The estimated total size in bytes of the underlying encoded values if
|
||||
/// they were stored contiguously as a vector of `L`. `include_null` should
|
||||
|
|
|
@ -53,7 +53,7 @@ where
|
|||
"[{}] rows: {:?}, size: {}",
|
||||
self.name(),
|
||||
self.num_rows(),
|
||||
self.size()
|
||||
self.size(false)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -252,9 +252,13 @@ where
|
|||
self.values.len() as u32
|
||||
}
|
||||
|
||||
/// Encoded data size including `Self` - an "accurate" estimation.
|
||||
fn size(&self) -> usize {
|
||||
size_of::<Self>() + (size_of::<P>() * self.values.len())
|
||||
fn size(&self, buffers: bool) -> usize {
|
||||
let values = size_of::<P>()
|
||||
* match buffers {
|
||||
true => self.values.capacity(),
|
||||
false => self.values.len(),
|
||||
};
|
||||
size_of::<Self>() + values
|
||||
}
|
||||
|
||||
fn size_raw(&self, _: bool) -> usize {
|
||||
|
@ -425,6 +429,19 @@ mod test {
|
|||
(Fixed::new(values, Arc::clone(&mock)), mock)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn size() {
|
||||
let (v, _) = new_encoding(vec![22_i64, 1, 18]);
|
||||
// Self is 32 bytes and there are 3 * 8b values
|
||||
assert_eq!(v.size(false), 56);
|
||||
|
||||
// check pre-allocated sizing
|
||||
let (mut v, _) = new_encoding(vec![]);
|
||||
v.values.reserve_exact(40);
|
||||
// Self if 32 bytes and there are 40 * 8b values allocated
|
||||
assert_eq!(v.size(true), 352);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value() {
|
||||
let (v, transcoder) = new_encoding(vec![22, 1, 18]);
|
||||
|
|
|
@ -52,7 +52,7 @@ where
|
|||
self.name(),
|
||||
self.arr.len(),
|
||||
self.arr.null_count(),
|
||||
self.size()
|
||||
self.size(false)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -260,8 +260,12 @@ where
|
|||
self.arr.null_count() as u32
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
size_of::<Self>() + self.arr.get_array_memory_size()
|
||||
fn size(&self, buffers: bool) -> usize {
|
||||
size_of::<Self>()
|
||||
+ match buffers {
|
||||
true => self.arr.get_array_memory_size(),
|
||||
false => self.arr.get_buffer_memory_size(),
|
||||
}
|
||||
}
|
||||
|
||||
/// The estimated total size in bytes of the underlying values in the
|
||||
|
@ -478,7 +482,8 @@ mod test {
|
|||
#[test]
|
||||
fn size() {
|
||||
let (v, _) = new_encoding(vec![None, None, Some(100), Some(2222)]);
|
||||
assert_eq!(v.size(), 408);
|
||||
assert_eq!(v.size(false), 264);
|
||||
assert_eq!(v.size(true), 408); // includes allocated buffers
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -70,7 +70,7 @@ where
|
|||
f,
|
||||
"[{}] size: {:?} rows: {:?} nulls: {} runs: {} ",
|
||||
self.name(),
|
||||
self.size(),
|
||||
self.size(false),
|
||||
self.num_rows(),
|
||||
self.null_count(),
|
||||
self.run_lengths.len()
|
||||
|
@ -343,8 +343,13 @@ where
|
|||
ENCODING_NAME
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
std::mem::size_of::<Self>() + (self.run_lengths.len() * size_of::<(u32, Option<P>)>())
|
||||
fn size(&self, buffers: bool) -> usize {
|
||||
let values = size_of::<(u32, Option<P>)>()
|
||||
* match buffers {
|
||||
true => self.run_lengths.capacity(),
|
||||
false => self.run_lengths.len(),
|
||||
};
|
||||
std::mem::size_of::<Self>() + values
|
||||
}
|
||||
|
||||
fn size_raw(&self, include_nulls: bool) -> usize {
|
||||
|
@ -713,16 +718,26 @@ mod test {
|
|||
fn size() {
|
||||
let (mut enc, _) = new_encoding(vec![]);
|
||||
|
||||
// 40b Self + (0 rl * 24) = 32
|
||||
assert_eq!(enc.size(), 40);
|
||||
// 40b Self + (0 rl * 24) = 40
|
||||
assert_eq!(enc.size(false), 40);
|
||||
|
||||
enc.push_none();
|
||||
// 40b Self + (1 rl * 24) = 56
|
||||
assert_eq!(enc.size(), 64);
|
||||
// 40b Self + (1 rl * 24) = 64
|
||||
assert_eq!(enc.size(false), 64);
|
||||
|
||||
enc.push_additional_some(1, 10);
|
||||
// 40b Self + (2 rl * 24) = 80
|
||||
assert_eq!(enc.size(), 88);
|
||||
// 40b Self + (2 rl * 24) = 88
|
||||
assert_eq!(enc.size(false), 88);
|
||||
|
||||
// check allocated buffer size
|
||||
let (mut enc, _) = new_encoding(vec![]);
|
||||
enc.run_lengths.reserve_exact(40);
|
||||
// 40b Self + (40 rl * 24) = 1000b
|
||||
assert_eq!(enc.size(true), 1000);
|
||||
|
||||
// 40b Self + (40 rl * 24) = 1000b - no new allocations
|
||||
enc.push_additional_some(1, 10);
|
||||
assert_eq!(enc.size(true), 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -30,8 +30,8 @@ impl Encoding {
|
|||
|
||||
pub fn size(&self) -> usize {
|
||||
match &self {
|
||||
Self::RLE(enc) => enc.size(),
|
||||
Self::Plain(enc) => enc.size(),
|
||||
Self::RLE(enc) => enc.size(false),
|
||||
Self::Plain(enc) => enc.size(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ impl Default for Dictionary {
|
|||
}
|
||||
|
||||
impl Dictionary {
|
||||
/// Initialises an Dictionar encoding with a set of logical values.
|
||||
/// Initialises a Dictionary encoding with a set of logical values.
|
||||
/// Creating an encoding using `with_dictionary` ensures that the dictionary
|
||||
/// is in the correct order, and will allow values to be inserted with any
|
||||
/// value in the dictionary.
|
||||
|
@ -61,22 +61,33 @@ impl Dictionary {
|
|||
}
|
||||
|
||||
/// A reasonable estimation of the on-heap size this encoding takes up.
|
||||
pub fn size(&self) -> usize {
|
||||
// the total size of all decoded values in the column.
|
||||
let decoded_keys_size = self
|
||||
/// If `buffers` is true then all allocated buffers in the encoding are
|
||||
/// accounted for.
|
||||
pub fn size(&self, buffers: bool) -> usize {
|
||||
let base_size = size_of::<Self>();
|
||||
|
||||
// Total size of all decoded values in the column.
|
||||
let mut decoded_keys_size = self
|
||||
.entries
|
||||
.iter()
|
||||
.map(|k| match k {
|
||||
Some(v) => v.len(),
|
||||
Some(v) => v.len(),
|
||||
None => 0,
|
||||
} + size_of::<Option<String>>())
|
||||
.sum::<usize>();
|
||||
|
||||
let entries_size = size_of::<Vec<Option<String>>>() + decoded_keys_size;
|
||||
let encoded_ids_size = size_of::<Vec<u32>>() + (size_of::<u32>() * self.encoded_data.len());
|
||||
if buffers {
|
||||
decoded_keys_size +=
|
||||
(self.entries.capacity() - self.entries.len()) * size_of::<Option<String>>();
|
||||
}
|
||||
|
||||
// + 1 for contains_null field
|
||||
entries_size + encoded_ids_size + 1
|
||||
let encoded_ids_size = size_of::<u32>()
|
||||
* match buffers {
|
||||
true => self.encoded_data.capacity(),
|
||||
false => self.encoded_data.len(),
|
||||
};
|
||||
|
||||
base_size + decoded_keys_size + encoded_ids_size
|
||||
}
|
||||
|
||||
/// A reasonable estimation of the on-heap size of the underlying string
|
||||
|
@ -837,7 +848,7 @@ impl std::fmt::Display for Dictionary {
|
|||
f,
|
||||
"[{}] size: {:?} rows: {:?} cardinality: {}",
|
||||
ENCODING_NAME,
|
||||
self.size(),
|
||||
self.size(false),
|
||||
self.num_rows(),
|
||||
self.cardinality(),
|
||||
)
|
||||
|
@ -873,17 +884,13 @@ mod test {
|
|||
enc.push_none();
|
||||
enc.push_none();
|
||||
|
||||
// keys - 14 bytes.
|
||||
|
||||
// 3 string entries in dictionary
|
||||
// entries is 24 + (24*4) + 14 == 134
|
||||
|
||||
// Self - 24+24+8 = 56 bytes (two vectors, a bool and padding)
|
||||
// 4 string entries (inc NULL) in vec = 4 * 24 = 96
|
||||
// 3 string entries with length 4+5+5 = 14
|
||||
// 15 rows.
|
||||
// encoded ids is 24 + (4 * 15) == 84
|
||||
|
||||
// 134 + 84 + 1 == 219
|
||||
|
||||
assert_eq!(enc.size(), 219);
|
||||
// encoded ids is (4 * 15) == 60
|
||||
// 56 + 96 + 14 + 60 = 226
|
||||
assert_eq!(enc.size(false), 226);
|
||||
|
||||
// check dictionary
|
||||
assert_eq!(
|
||||
|
@ -899,6 +906,24 @@ mod test {
|
|||
enc.encoded_data,
|
||||
vec![1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 3, NULL_ID, NULL_ID, NULL_ID, NULL_ID]
|
||||
);
|
||||
|
||||
// check for allocated size
|
||||
let mut enc = Dictionary::default();
|
||||
enc.encoded_data.reserve_exact(40);
|
||||
enc.entries.reserve_exact(39); // account for already-allocated NULL element
|
||||
enc.push_additional(Some("east".to_string()), 3);
|
||||
enc.push_additional(Some("north".to_string()), 1);
|
||||
enc.push_additional(Some("east".to_string()), 5);
|
||||
enc.push_additional(Some("south".to_string()), 2);
|
||||
enc.push_additional(None, 4);
|
||||
|
||||
// Self - 24+24+8 = 56 bytes (two vectors, a bool and padding)
|
||||
// 40 string entries (inc NULL) in vec = 40 * 24 = 960
|
||||
// 3 string entries with lengths 4+5+5 = 14
|
||||
// 15 rows but 40 elements allocated
|
||||
// encoded ids is (40 * 4) == 160
|
||||
// 56 + 960 + 14 + 160 = 1190
|
||||
assert_eq!(enc.size(true), 1190);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -3,8 +3,6 @@ use std::convert::From;
|
|||
use std::iter;
|
||||
use std::mem::size_of;
|
||||
|
||||
use croaring::Bitmap;
|
||||
|
||||
use arrow::array::{Array, StringArray};
|
||||
|
||||
use super::NULL_ID;
|
||||
|
@ -75,13 +73,18 @@ impl RLE {
|
|||
}
|
||||
|
||||
/// A reasonable estimation of the on-heap size this encoding takes up.
|
||||
pub fn size(&self) -> usize {
|
||||
// the total size of all decoded values in the column.
|
||||
let decoded_keys_size = self.index_entries.iter().map(|k| k.len()).sum::<usize>();
|
||||
/// If `buffers` is true then the size of all allocated buffers in the
|
||||
/// encoding are accounted for.
|
||||
pub fn size(&self, buffers: bool) -> usize {
|
||||
let base_size = size_of::<Self>();
|
||||
|
||||
let index_entry_size = size_of::<Vec<String>>() // container size
|
||||
+ (size_of::<String>() * self.index_entries.len()) // elements size
|
||||
+ decoded_keys_size; // heap allocated strings size
|
||||
let mut index_entries_size = size_of::<String>()
|
||||
* match buffers {
|
||||
true => self.index_entries.capacity(),
|
||||
false => self.index_entries.len(),
|
||||
};
|
||||
// the total size of all decoded values in the column.
|
||||
index_entries_size += self.index_entries.iter().map(|k| k.len()).sum::<usize>();
|
||||
|
||||
// The total size (an upper bound estimate) of all the bitmaps
|
||||
// in the column.
|
||||
|
@ -91,14 +94,16 @@ impl RLE {
|
|||
.map(|row_ids| row_ids.size())
|
||||
.sum::<usize>();
|
||||
|
||||
let index_row_ids_size = size_of::<BTreeMap<u32, Bitmap>>()
|
||||
+ (size_of::<u32>() * self.index_row_ids.len())
|
||||
+ row_ids_bitmaps_size;
|
||||
let index_row_ids_size =
|
||||
(size_of::<u32>() * self.index_row_ids.len()) + row_ids_bitmaps_size;
|
||||
|
||||
let run_lengths_size = size_of::<Vec<(u32, u32)>>() + // container size
|
||||
(size_of::<(u32, u32)>() * self.run_lengths.len()); // each run-length size
|
||||
let run_lengths_size = size_of::<(u32, u32)>()
|
||||
* match buffers {
|
||||
true => self.run_lengths.capacity(),
|
||||
false => self.run_lengths.len(),
|
||||
};
|
||||
|
||||
index_entry_size + index_row_ids_size + run_lengths_size + 1 + 4
|
||||
base_size + index_entries_size + index_row_ids_size + run_lengths_size
|
||||
}
|
||||
|
||||
/// A reasonable estimation of the on-heap size of the underlying string
|
||||
|
@ -958,7 +963,7 @@ impl std::fmt::Display for RLE {
|
|||
f,
|
||||
"[{}] size: {:?} rows: {:?} cardinality: {}, nulls: {} runs: {} ",
|
||||
ENCODING_NAME,
|
||||
self.size(),
|
||||
self.size(false),
|
||||
self.num_rows,
|
||||
self.cardinality(),
|
||||
self.null_count(),
|
||||
|
@ -1000,22 +1005,34 @@ mod test {
|
|||
enc.push_none();
|
||||
enc.push_none();
|
||||
|
||||
// Note: there are 4 index entries to account for NULL entry.
|
||||
// `index_entry` is 24 + (24*4) + 14 == 134
|
||||
// * Self: 24 + 24 + 24 + 1 + (padding 3b) + 4 = 80b
|
||||
// * index entries: (4) are is (24*4) + 14 == 110
|
||||
// * index row ids: (bitmaps) is (4 * 4) + (108b for bitmaps) == 124
|
||||
// * run lengths: (8*5) == 40
|
||||
//
|
||||
// bitmaps for east, north, south and NULL entries.
|
||||
// `index_row_ids` is 24 + (4 * 4) + (108b for bitmaps) == 148
|
||||
//
|
||||
// `run lengths` is 24 + (8*5) == 64
|
||||
//
|
||||
// `contains_null` - 1 byte
|
||||
// `num_rows` - 4 bytes
|
||||
//
|
||||
// 351
|
||||
// 354
|
||||
// assert_eq!(enc.size(false), 354);
|
||||
|
||||
// TODO(edd): there some mystery bytes in the bitmap implementation.
|
||||
// need to figure out how to measure these
|
||||
assert_eq!(enc.size(), 351);
|
||||
// check allocated size
|
||||
let mut enc = RLE::default();
|
||||
enc.index_entries.reserve_exact(39); // account for already-allocated NULL element
|
||||
enc.run_lengths.reserve_exact(40);
|
||||
|
||||
enc.push_additional(Some("east".to_string()), 3);
|
||||
enc.push_additional(Some("north".to_string()), 1);
|
||||
enc.push_additional(Some("east".to_string()), 5);
|
||||
enc.push_additional(Some("south".to_string()), 2);
|
||||
enc.push_none();
|
||||
enc.push_none();
|
||||
enc.push_none();
|
||||
enc.push_none();
|
||||
|
||||
// * Self: 24 + 24 + 24 + 1 + (padding 3b) + 4 = 80b
|
||||
// * index entries: (40 * 24) + 14 == 974
|
||||
// * index row ids: (bitmaps) is (4 * 4) + (108b for bitmaps) == 124
|
||||
// * run lengths: (40 * 8) == 320
|
||||
//
|
||||
assert_eq!(enc.size(true), 1498);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -32,7 +32,7 @@ impl FloatEncoding {
|
|||
/// The total size in bytes of to store columnar data in memory.
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::F64(enc, _) => enc.size(),
|
||||
Self::F64(enc, _) => enc.size(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ impl IntegerEncoding {
|
|||
/// The total size in bytes of the store columnar data.
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::I64(enc, _) => enc.size(),
|
||||
Self::U64(enc, _) => enc.size(),
|
||||
Self::I64(enc, _) => enc.size(false),
|
||||
Self::U64(enc, _) => enc.size(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -971,13 +971,13 @@ mod test {
|
|||
|
||||
// Input data containing NULL will be stored in an Arrow array encoding
|
||||
let cases = vec![
|
||||
(vec![None, Some(0_i64)], 400_usize), // u8 Arrow array
|
||||
(vec![None, Some(-120_i64)], 400), // i8
|
||||
(vec![None, Some(399_i64)], 400), // u16
|
||||
(vec![None, Some(-399_i64)], 400), // i16
|
||||
(vec![None, Some(u32::MAX as i64)], 400), // u32
|
||||
(vec![None, Some(i32::MIN as i64)], 400), // i32
|
||||
(vec![None, Some(u32::MAX as i64 + 1)], 400), //u64
|
||||
(vec![None, Some(0_i64)], 256_usize), // u8 Arrow array
|
||||
(vec![None, Some(-120_i64)], 256), // i8
|
||||
(vec![None, Some(399_i64)], 256), // u16
|
||||
(vec![None, Some(-399_i64)], 256), // i16
|
||||
(vec![None, Some(u32::MAX as i64)], 256), // u32
|
||||
(vec![None, Some(i32::MIN as i64)], 256), // i32
|
||||
(vec![None, Some(u32::MAX as i64 + 1)], 256), //u64
|
||||
];
|
||||
|
||||
for (case, name) in cases.iter().cloned() {
|
||||
|
@ -1163,10 +1163,10 @@ mod test {
|
|||
|
||||
// Input data containing NULL will be stored in an Arrow array encoding
|
||||
let cases = vec![
|
||||
(vec![None, Some(0_u64)], 400_usize),
|
||||
(vec![None, Some(399_u64)], 400),
|
||||
(vec![None, Some(u32::MAX as u64)], 400),
|
||||
(vec![None, Some(u64::MAX)], 400),
|
||||
(vec![None, Some(0_u64)], 256_usize),
|
||||
(vec![None, Some(399_u64)], 256),
|
||||
(vec![None, Some(u32::MAX as u64)], 256),
|
||||
(vec![None, Some(u64::MAX)], 256),
|
||||
];
|
||||
|
||||
for (case, size) in cases.iter().cloned() {
|
||||
|
|
|
@ -30,8 +30,8 @@ impl StringEncoding {
|
|||
/// The estimated total size in bytes of the in-memory columnar data.
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::RleDictionary(enc) => enc.size(),
|
||||
Self::Dictionary(enc) => enc.size(),
|
||||
Self::RleDictionary(enc) => enc.size(false),
|
||||
Self::Dictionary(enc) => enc.size(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2531,7 +2531,7 @@ mod tests {
|
|||
("svr_id", "1"),
|
||||
])
|
||||
.histogram()
|
||||
.sample_sum_eq(3191.0)
|
||||
.sample_sum_eq(3197.0)
|
||||
.unwrap();
|
||||
|
||||
let rb = collect_read_filter(&rb_chunk).await;
|
||||
|
@ -3400,7 +3400,7 @@ mod tests {
|
|||
id: 2,
|
||||
storage: ChunkStorage::ReadBufferAndObjectStore,
|
||||
lifecycle_action,
|
||||
memory_bytes: 3284, // size of RB and OS chunks
|
||||
memory_bytes: 3140, // size of RB and OS chunks
|
||||
object_store_bytes: 1577, // size of parquet file
|
||||
row_count: 2,
|
||||
time_of_last_access: None,
|
||||
|
@ -3451,7 +3451,7 @@ mod tests {
|
|||
}
|
||||
|
||||
assert_eq!(db.catalog.metrics().memory().mutable_buffer(), 2486 + 87);
|
||||
assert_eq!(db.catalog.metrics().memory().read_buffer(), 2410);
|
||||
assert_eq!(db.catalog.metrics().memory().read_buffer(), 2266);
|
||||
assert_eq!(db.catalog.metrics().memory().object_store(), 874);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue