feat: expose all column stats
parent
88ed58aa8a
commit
c4987028fb
|
@ -153,6 +153,18 @@ impl Column {
|
|||
}
|
||||
}
|
||||
|
||||
// Returns statistics about the physical layout of columns
|
||||
pub(crate) fn storage_stats(&self) -> Statistics {
|
||||
match &self {
|
||||
Self::String(_, data) => data.storage_stats(),
|
||||
Self::Float(_, data) => data.storage_stats(),
|
||||
Self::Integer(_, data) => data.storage_stats(),
|
||||
Self::Unsigned(_, data) => data.storage_stats(),
|
||||
Self::Bool(_, data) => data.storage_stats(),
|
||||
Self::ByteArray(_, data) => data.storage_stats(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn properties(&self) -> &ColumnProperties {
|
||||
match &self {
|
||||
Self::String(meta, _) => &meta.properties,
|
||||
|
@ -1309,6 +1321,15 @@ impl Iterator for RowIDsIterator<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
// Statistics about the composition of a column
|
||||
pub(crate) struct Statistics {
|
||||
enc_type: &'static str,
|
||||
log_data_type: &'static str,
|
||||
values: u32,
|
||||
nulls: u32,
|
||||
bytes: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use super::cmp;
|
||||
use super::encoding::bool::Bool;
|
||||
use super::{cmp, Statistics};
|
||||
use crate::column::{RowIDs, Value, Values};
|
||||
|
||||
/// Encodings for boolean values.
|
||||
|
@ -22,6 +22,17 @@ impl BooleanEncoding {
|
|||
}
|
||||
}
|
||||
|
||||
// Returns statistics about the physical layout of columns
|
||||
pub(crate) fn storage_stats(&self) -> Statistics {
|
||||
Statistics {
|
||||
enc_type: self.name(),
|
||||
log_data_type: "bool",
|
||||
values: self.num_rows(),
|
||||
nulls: self.null_count(),
|
||||
bytes: self.size(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines if the column contains a NULL value.
|
||||
pub fn contains_null(&self) -> bool {
|
||||
match self {
|
||||
|
@ -29,6 +40,13 @@ impl BooleanEncoding {
|
|||
}
|
||||
}
|
||||
|
||||
/// The total number of rows in the column.
|
||||
pub fn null_count(&self) -> u32 {
|
||||
match self {
|
||||
Self::BooleanNull(enc) => enc.null_count(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines if the column contains a non-null value.
|
||||
pub fn has_any_non_null_value(&self) -> bool {
|
||||
match self {
|
||||
|
@ -106,6 +124,13 @@ impl BooleanEncoding {
|
|||
Self::BooleanNull(c) => c.count(row_ids),
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of this encoding.
|
||||
pub fn name(&self) -> &'static str {
|
||||
match &self {
|
||||
Self::BooleanNull(_) => "None",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BooleanEncoding {
|
||||
|
|
|
@ -36,6 +36,10 @@ impl Bool {
|
|||
self.arr.null_count() > 0
|
||||
}
|
||||
|
||||
pub fn null_count(&self) -> u32 {
|
||||
self.arr.null_count() as u32
|
||||
}
|
||||
|
||||
/// Returns an estimation of the total size in bytes used by this column
|
||||
/// encoding.
|
||||
pub fn size(&self) -> usize {
|
||||
|
|
|
@ -12,7 +12,7 @@ use hashbrown::{hash_map, HashMap};
|
|||
use itertools::Itertools;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
|
||||
use crate::column::{cmp::Operator, Column, RowIDs, RowIDsOption};
|
||||
use crate::column::{self, cmp::Operator, Column, RowIDs, RowIDsOption};
|
||||
use crate::schema;
|
||||
use crate::schema::{AggregateType, LogicalDataType, ResultSchema};
|
||||
use crate::value::{
|
||||
|
@ -1059,6 +1059,10 @@ impl RowGroup {
|
|||
|
||||
dst
|
||||
}
|
||||
|
||||
pub(crate) fn column_storage_statistics(&self) -> Vec<column::Statistics> {
|
||||
self.columns.iter().map(|c| c.storage_stats()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for &RowGroup {
|
||||
|
|
|
@ -12,9 +12,12 @@ use arrow::record_batch::RecordBatch;
|
|||
use data_types::{chunk::ChunkColumnSummary, partition_metadata::TableSummary};
|
||||
use internal_types::selection::Selection;
|
||||
|
||||
use crate::row_group::{self, ColumnName, Predicate, RowGroup};
|
||||
use crate::schema::{AggregateType, ColumnType, LogicalDataType, ResultSchema};
|
||||
use crate::value::{OwnedValue, Scalar, Value};
|
||||
use crate::{
|
||||
column,
|
||||
row_group::{self, ColumnName, Predicate, RowGroup},
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
|
@ -513,6 +516,16 @@ impl Table {
|
|||
.iter()
|
||||
.any(|row_group| row_group.satisfies_predicate(predicate))
|
||||
}
|
||||
|
||||
pub(crate) fn column_storage_statistics(&self) -> Vec<column::Statistics> {
|
||||
let table_data = self.table_data.read();
|
||||
table_data
|
||||
.data
|
||||
.iter()
|
||||
.map(|rg| rg.column_storage_statistics())
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(edd): reduce owned strings here by, e.g., using references as keys.
|
||||
|
|
Loading…
Reference in New Issue