From d80e71ad8626e63c0feb287c4db764be7a649209 Mon Sep 17 00:00:00 2001 From: Edd Robinson <me@edd.io> Date: Fri, 14 May 2021 10:34:54 +0100 Subject: [PATCH] feat: add new metric to track raw size --- read_buffer/src/chunk.rs | 164 ++++++++++++++++++------------ read_buffer/src/column.rs | 12 ++- read_buffer/src/column/boolean.rs | 2 + read_buffer/src/column/float.rs | 2 + read_buffer/src/column/integer.rs | 2 + read_buffer/src/column/string.rs | 2 + 6 files changed, 114 insertions(+), 70 deletions(-) diff --git a/read_buffer/src/chunk.rs b/read_buffer/src/chunk.rs index b3286e7289..8f7bf88193 100644 --- a/read_buffer/src/chunk.rs +++ b/read_buffer/src/chunk.rs @@ -663,6 +663,26 @@ impl Chunk { .column_bytes_total .add_with_labels(stat.bytes as f64 * sign, labels); + // update raw estimated bytes of NULL values + self.metrics.column_raw_bytes_total.add_with_labels( + (stat.raw_bytes as f64 - stat.raw_bytes_no_null as f64) * sign, + &[ + KeyValue::new("encoding", stat.enc_type), + KeyValue::new("log_data_type", stat.log_data_type), + KeyValue::new("null", "true"), + ], + ); + + // update raw estimated bytes of non-NULL values + self.metrics.column_raw_bytes_total.add_with_labels( + stat.raw_bytes_no_null as f64 * sign, + &[ + KeyValue::new("encoding", stat.enc_type), + KeyValue::new("log_data_type", stat.log_data_type), + KeyValue::new("null", "false"), + ], + ); + // update number of NULL values self.metrics.column_values_total.add_with_labels( stat.nulls as f64 * sign, @@ -703,28 +723,16 @@ pub struct ChunkMetrics { // This metric tracks the total number of bytes used by read buffer columns column_bytes_total: metrics::Gauge, + + // This metric tracks an estimated uncompressed data size for read buffer + // columns, further segmented by nullness. It is a building block for + // tracking a measure of overall compression. + column_raw_bytes_total: metrics::Gauge, } impl ChunkMetrics { pub fn new(registry: &MetricRegistry) -> Self { - let domain = registry.register_domain("read_buffer"); - Self { - columns_total: domain.register_gauge_metric( - "column", - Some("total"), - "The number of columns within the Read Buffer", - ), - column_values_total: domain.register_gauge_metric( - "column", - Some("values"), - "The number of values within columns in the Read Buffer", - ), - column_bytes_total: domain.register_gauge_metric( - "column", - Some("bytes"), - "The number of bytes used by all columns in the Read Buffer", - ), - } + Self::new_with_db(registry, String::new()) } pub fn new_with_db(registry: &MetricRegistry, db: String) -> Self { @@ -746,6 +754,12 @@ impl ChunkMetrics { "column", Some("bytes"), "The number of bytes used by all columns in the Read Buffer", + vec![metrics::KeyValue::new("db", db.clone())], + ), + column_raw_bytes_total: domain.register_gauge_metric_with_labels( + "column_raw", + Some("bytes"), + "The number of bytes used by all columns if they were uncompressed in the Read Buffer", vec![metrics::KeyValue::new("db", db)], ), } @@ -948,7 +962,7 @@ mod test { #[test] fn add_remove_tables() { let reg = metrics::TestMetricRegistry::new(Arc::new(metrics::MetricRegistry::new())); - let metrics = ChunkMetrics::new(®.registry()); + let metrics = ChunkMetrics::new_with_db(®.registry(), "mydb".to_string()); let chunk = Chunk::new(22, Arc::new(metrics)); // Add a new table to the chunk. @@ -1009,29 +1023,39 @@ mod test { assert_eq!( String::from_utf8(reg.registry().metrics_as_text()).unwrap(), vec![ - "# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer", - "# TYPE read_buffer_column_bytes gauge", - r#"read_buffer_column_bytes{encoding="BT_U32",log_data_type="i64"} 108"#, - r#"read_buffer_column_bytes{encoding="None",log_data_type="bool"} 1152"#, - r#"read_buffer_column_bytes{encoding="None",log_data_type="f64"} 1176"#, - r#"read_buffer_column_bytes{encoding="RLE",log_data_type="string"} 750"#, - r#"# HELP read_buffer_column_total The number of columns within the Read Buffer"#, - r#"# TYPE read_buffer_column_total gauge"#, - r#"read_buffer_column_total{encoding="BT_U32",log_data_type="i64"} 3"#, - r#"read_buffer_column_total{encoding="None",log_data_type="bool"} 3"#, - r#"read_buffer_column_total{encoding="None",log_data_type="f64"} 6"#, - r#"read_buffer_column_total{encoding="RLE",log_data_type="string"} 3"#, - r#"# HELP read_buffer_column_values The number of values within columns in the Read Buffer"#, - r#"# TYPE read_buffer_column_values gauge"#, - r#"read_buffer_column_values{encoding="BT_U32",log_data_type="i64",null="false"} 9"#, - r#"read_buffer_column_values{encoding="BT_U32",log_data_type="i64",null="true"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="bool",null="false"} 9"#, - r#"read_buffer_column_values{encoding="None",log_data_type="bool",null="true"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="f64",null="false"} 15"#, - r#"read_buffer_column_values{encoding="None",log_data_type="f64",null="true"} 3"#, - r#"read_buffer_column_values{encoding="RLE",log_data_type="string",null="false"} 9"#, - r#"read_buffer_column_values{encoding="RLE",log_data_type="string",null="true"} 0"#, - "", + "# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer", + "# TYPE read_buffer_column_bytes gauge", + r#"read_buffer_column_bytes{db="mydb",encoding="BT_U32",log_data_type="i64"} 108"#, + r#"read_buffer_column_bytes{db="mydb",encoding="None",log_data_type="bool"} 1152"#, + r#"read_buffer_column_bytes{db="mydb",encoding="None",log_data_type="f64"} 1176"#, + r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 750"#, + r#"# HELP read_buffer_column_raw_bytes The number of bytes used by all columns if they were uncompressed in the Read Buffer"#, + r#"# TYPE read_buffer_column_raw_bytes gauge"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32",log_data_type="i64",null="false"} 144"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32",log_data_type="i64",null="true"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="bool",null="false"} 81"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="bool",null="true"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="f64",null="false"} 264"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="f64",null="true"} 24"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="RLE",log_data_type="string",null="false"} 324"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="RLE",log_data_type="string",null="true"} 0"#, + r#"# HELP read_buffer_column_total The number of columns within the Read Buffer"#, + r#"# TYPE read_buffer_column_total gauge"#, + r#"read_buffer_column_total{db="mydb",encoding="BT_U32",log_data_type="i64"} 3"#, + r#"read_buffer_column_total{db="mydb",encoding="None",log_data_type="bool"} 3"#, + r#"read_buffer_column_total{db="mydb",encoding="None",log_data_type="f64"} 6"#, + r#"read_buffer_column_total{db="mydb",encoding="RLE",log_data_type="string"} 3"#, + r#"# HELP read_buffer_column_values The number of values within columns in the Read Buffer"#, + r#"# TYPE read_buffer_column_values gauge"#, + r#"read_buffer_column_values{db="mydb",encoding="BT_U32",log_data_type="i64",null="false"} 9"#, + r#"read_buffer_column_values{db="mydb",encoding="BT_U32",log_data_type="i64",null="true"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="bool",null="false"} 9"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="bool",null="true"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="f64",null="false"} 15"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="f64",null="true"} 3"#, + r#"read_buffer_column_values{db="mydb",encoding="RLE",log_data_type="string",null="false"} 9"#, + r#"read_buffer_column_values{db="mydb",encoding="RLE",log_data_type="string",null="true"} 0"#, + "", ] .join("\n") ); @@ -1041,29 +1065,39 @@ mod test { assert_eq!( String::from_utf8(reg.registry().metrics_as_text()).unwrap(), vec![ - "# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer", - "# TYPE read_buffer_column_bytes gauge", - r#"read_buffer_column_bytes{encoding="BT_U32",log_data_type="i64"} 0"#, - r#"read_buffer_column_bytes{encoding="None",log_data_type="bool"} 0"#, - r#"read_buffer_column_bytes{encoding="None",log_data_type="f64"} 0"#, - r#"read_buffer_column_bytes{encoding="RLE",log_data_type="string"} 0"#, - r#"# HELP read_buffer_column_total The number of columns within the Read Buffer"#, - r#"# TYPE read_buffer_column_total gauge"#, - r#"read_buffer_column_total{encoding="BT_U32",log_data_type="i64"} 0"#, - r#"read_buffer_column_total{encoding="None",log_data_type="bool"} 0"#, - r#"read_buffer_column_total{encoding="None",log_data_type="f64"} 0"#, - r#"read_buffer_column_total{encoding="RLE",log_data_type="string"} 0"#, - r#"# HELP read_buffer_column_values The number of values within columns in the Read Buffer"#, - r#"# TYPE read_buffer_column_values gauge"#, - r#"read_buffer_column_values{encoding="BT_U32",log_data_type="i64",null="false"} 0"#, - r#"read_buffer_column_values{encoding="BT_U32",log_data_type="i64",null="true"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="bool",null="false"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="bool",null="true"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="f64",null="false"} 0"#, - r#"read_buffer_column_values{encoding="None",log_data_type="f64",null="true"} 0"#, - r#"read_buffer_column_values{encoding="RLE",log_data_type="string",null="false"} 0"#, - r#"read_buffer_column_values{encoding="RLE",log_data_type="string",null="true"} 0"#, - "", + "# HELP read_buffer_column_bytes The number of bytes used by all columns in the Read Buffer", + "# TYPE read_buffer_column_bytes gauge", + r#"read_buffer_column_bytes{db="mydb",encoding="BT_U32",log_data_type="i64"} 0"#, + r#"read_buffer_column_bytes{db="mydb",encoding="None",log_data_type="bool"} 0"#, + r#"read_buffer_column_bytes{db="mydb",encoding="None",log_data_type="f64"} 0"#, + r#"read_buffer_column_bytes{db="mydb",encoding="RLE",log_data_type="string"} 0"#, + r#"# HELP read_buffer_column_raw_bytes The number of bytes used by all columns if they were uncompressed in the Read Buffer"#, + r#"# TYPE read_buffer_column_raw_bytes gauge"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32",log_data_type="i64",null="false"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="BT_U32",log_data_type="i64",null="true"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="bool",null="false"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="bool",null="true"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="f64",null="false"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="None",log_data_type="f64",null="true"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="RLE",log_data_type="string",null="false"} 0"#, + r#"read_buffer_column_raw_bytes{db="mydb",encoding="RLE",log_data_type="string",null="true"} 0"#, + r#"# HELP read_buffer_column_total The number of columns within the Read Buffer"#, + r#"# TYPE read_buffer_column_total gauge"#, + r#"read_buffer_column_total{db="mydb",encoding="BT_U32",log_data_type="i64"} 0"#, + r#"read_buffer_column_total{db="mydb",encoding="None",log_data_type="bool"} 0"#, + r#"read_buffer_column_total{db="mydb",encoding="None",log_data_type="f64"} 0"#, + r#"read_buffer_column_total{db="mydb",encoding="RLE",log_data_type="string"} 0"#, + r#"# HELP read_buffer_column_values The number of values within columns in the Read Buffer"#, + r#"# TYPE read_buffer_column_values gauge"#, + r#"read_buffer_column_values{db="mydb",encoding="BT_U32",log_data_type="i64",null="false"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="BT_U32",log_data_type="i64",null="true"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="bool",null="false"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="bool",null="true"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="f64",null="false"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="None",log_data_type="f64",null="true"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="RLE",log_data_type="string",null="false"} 0"#, + r#"read_buffer_column_values{db="mydb",encoding="RLE",log_data_type="string",null="true"} 0"#, + "", ] .join("\n") ); diff --git a/read_buffer/src/column.rs b/read_buffer/src/column.rs index 3b140aad37..2a78bc0309 100644 --- a/read_buffer/src/column.rs +++ b/read_buffer/src/column.rs @@ -1343,11 +1343,13 @@ impl Iterator for RowIDsIterator<'_> { // Statistics about the composition of a column pub(crate) struct Statistics { - pub enc_type: &'static str, - pub log_data_type: &'static str, - pub values: u32, - pub nulls: u32, - pub bytes: usize, + pub enc_type: &'static str, // The encoding type + pub log_data_type: &'static str, // The logical data-type + pub values: u32, // Number of values present (NULL and non-NULL) + pub nulls: u32, // Number of NULL values present + pub bytes: usize, // Total size of data + pub raw_bytes: usize, // Estimated "uncompressed" size + pub raw_bytes_no_null: usize, // Estimated "uncompressed" size ignoring NULL values } #[cfg(test)] diff --git a/read_buffer/src/column/boolean.rs b/read_buffer/src/column/boolean.rs index 47e70ca9b6..26d6570543 100644 --- a/read_buffer/src/column/boolean.rs +++ b/read_buffer/src/column/boolean.rs @@ -39,6 +39,8 @@ impl BooleanEncoding { values: self.num_rows(), nulls: self.null_count(), bytes: self.size(), + raw_bytes: self.size_raw(true), + raw_bytes_no_null: self.size_raw(false), } } diff --git a/read_buffer/src/column/float.rs b/read_buffer/src/column/float.rs index be4fb43abd..c9d3967a58 100644 --- a/read_buffer/src/column/float.rs +++ b/read_buffer/src/column/float.rs @@ -49,6 +49,8 @@ impl FloatEncoding { values: self.num_rows(), nulls: self.null_count(), bytes: self.size(), + raw_bytes: self.size_raw(true), + raw_bytes_no_null: self.size_raw(false), } } diff --git a/read_buffer/src/column/integer.rs b/read_buffer/src/column/integer.rs index 4e6cbadb79..aaf03dbc85 100644 --- a/read_buffer/src/column/integer.rs +++ b/read_buffer/src/column/integer.rs @@ -99,6 +99,8 @@ impl IntegerEncoding { values: self.num_rows(), nulls: self.null_count(), bytes: self.size(), + raw_bytes: self.size_raw(true), + raw_bytes_no_null: self.size_raw(false), } } diff --git a/read_buffer/src/column/string.rs b/read_buffer/src/column/string.rs index a0a200e547..7f34f327cd 100644 --- a/read_buffer/src/column/string.rs +++ b/read_buffer/src/column/string.rs @@ -80,6 +80,8 @@ impl StringEncoding { values: self.num_rows(), nulls: self.null_count(), bytes: self.size(), + raw_bytes: self.size_raw(true), + raw_bytes_no_null: self.size_raw(false), } }