fix: Handle all null columns in ColumnSummary::update_from (#2251)
parent
7504a16dbb
commit
0c300a4abb
|
@ -135,15 +135,24 @@ impl TableSummary {
|
||||||
pub fn update_from(&mut self, other: &Self) {
|
pub fn update_from(&mut self, other: &Self) {
|
||||||
assert_eq!(self.name, other.name);
|
assert_eq!(self.name, other.name);
|
||||||
|
|
||||||
|
let new_total_count = self.total_count() + other.total_count();
|
||||||
|
|
||||||
|
// update all existing columns
|
||||||
for col in &mut self.columns {
|
for col in &mut self.columns {
|
||||||
if let Some(other_col) = other.column(&col.name) {
|
if let Some(other_col) = other.column(&col.name) {
|
||||||
col.update_from(other_col);
|
col.update_from(other_col);
|
||||||
|
} else {
|
||||||
|
col.update_to_total_count(new_total_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add any columns that were new
|
||||||
for col in &other.columns {
|
for col in &other.columns {
|
||||||
if self.column(&col.name).is_none() {
|
if self.column(&col.name).is_none() {
|
||||||
self.columns.push(col.clone());
|
let mut new_col = col.clone();
|
||||||
|
// ensure the count is consistent
|
||||||
|
new_col.update_to_total_count(new_total_count);
|
||||||
|
self.columns.push(new_col);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -236,6 +245,21 @@ impl ColumnSummary {
|
||||||
(Statistics::String(_), _) => unreachable!(),
|
(Statistics::String(_), _) => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Updates these statistics so that that the total length of this
|
||||||
|
/// column is `len` rows, padding it with trailing NULLs if
|
||||||
|
/// necessary
|
||||||
|
pub fn update_to_total_count(&mut self, len: u64) {
|
||||||
|
let total_count = self.total_count();
|
||||||
|
assert!(
|
||||||
|
total_count <= len,
|
||||||
|
"trying to shrink column stats from {} to {}",
|
||||||
|
total_count,
|
||||||
|
len
|
||||||
|
);
|
||||||
|
let delta = len - total_count;
|
||||||
|
self.stats.update_for_nulls(delta);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Column name, statistics which encode type information
|
/// Column name, statistics which encode type information
|
||||||
|
@ -352,6 +376,17 @@ impl Statistics {
|
||||||
_ => std::mem::size_of::<Self>(),
|
_ => std::mem::size_of::<Self>(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Update the statistics values to account for `num_nulls` additional null values
|
||||||
|
pub fn update_for_nulls(&mut self, num_nulls: u64) {
|
||||||
|
match self {
|
||||||
|
Self::I64(v) => v.update_for_nulls(num_nulls),
|
||||||
|
Self::U64(v) => v.update_for_nulls(num_nulls),
|
||||||
|
Self::F64(v) => v.update_for_nulls(num_nulls),
|
||||||
|
Self::Bool(v) => v.update_for_nulls(num_nulls),
|
||||||
|
Self::String(v) => v.update_for_nulls(num_nulls),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Summary statistics for a column.
|
/// Summary statistics for a column.
|
||||||
|
@ -971,7 +1006,7 @@ mod tests {
|
||||||
let col = table_a.column("float").unwrap();
|
let col = table_a.column("float").unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
col.stats,
|
col.stats,
|
||||||
Statistics::F64(StatValues::new_non_null(Some(1.3), Some(9.1), 2))
|
Statistics::F64(StatValues::new(Some(1.3), Some(9.1), 4, 2))
|
||||||
);
|
);
|
||||||
|
|
||||||
table_b.update_from(&table_c);
|
table_b.update_from(&table_c);
|
||||||
|
@ -994,10 +1029,77 @@ mod tests {
|
||||||
let col = table_b.column("float").unwrap();
|
let col = table_b.column("float").unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
col.stats,
|
col.stats,
|
||||||
Statistics::F64(StatValues::new_non_null(Some(1.3), Some(9.1), 2))
|
Statistics::F64(StatValues::new(Some(1.3), Some(9.1), 4, 2))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn table_update_from_new_column() {
|
||||||
|
let string_stats = StatValues::new_with_value("bar".to_string());
|
||||||
|
let string_col = ColumnSummary {
|
||||||
|
name: "string".to_string(),
|
||||||
|
influxdb_type: None,
|
||||||
|
stats: Statistics::String(string_stats),
|
||||||
|
};
|
||||||
|
|
||||||
|
let int_stats = StatValues::new_with_value(5);
|
||||||
|
let int_col = ColumnSummary {
|
||||||
|
name: "int".to_string(),
|
||||||
|
influxdb_type: None,
|
||||||
|
stats: Statistics::I64(int_stats),
|
||||||
|
};
|
||||||
|
|
||||||
|
// table summary that does not have the "string" col
|
||||||
|
let table1 = TableSummary {
|
||||||
|
name: "the_table".to_string(),
|
||||||
|
columns: vec![int_col.clone()],
|
||||||
|
};
|
||||||
|
|
||||||
|
// table summary that has both columns
|
||||||
|
let table2 = TableSummary {
|
||||||
|
name: "the_table".to_string(),
|
||||||
|
columns: vec![int_col, string_col],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Statistics should be the same regardless of the order we update the stats
|
||||||
|
|
||||||
|
let expected_string_stats = Statistics::String(StatValues::new(
|
||||||
|
Some("bar".to_string()),
|
||||||
|
Some("bar".to_string()),
|
||||||
|
2, // total count is 2 even though did not appear in the update
|
||||||
|
1, // 1 null
|
||||||
|
));
|
||||||
|
|
||||||
|
let expected_int_stats = Statistics::I64(StatValues::new(
|
||||||
|
Some(5),
|
||||||
|
Some(5),
|
||||||
|
2,
|
||||||
|
0, // no nulls
|
||||||
|
));
|
||||||
|
|
||||||
|
// update table 1 with table 2
|
||||||
|
let mut table = table1.clone();
|
||||||
|
table.update_from(&table2);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&table.column("string").unwrap().stats,
|
||||||
|
&expected_string_stats
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(&table.column("int").unwrap().stats, &expected_int_stats);
|
||||||
|
|
||||||
|
// update table 2 with table 1
|
||||||
|
let mut table = table2;
|
||||||
|
table.update_from(&table1);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
&table.column("string").unwrap().stats,
|
||||||
|
&expected_string_stats
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(&table.column("int").unwrap().stats, &expected_int_stats);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_table_summaries() {
|
fn from_table_summaries() {
|
||||||
let mut string_stats = StatValues::new_with_value("foo".to_string());
|
let mut string_stats = StatValues::new_with_value("foo".to_string());
|
||||||
|
@ -1035,10 +1137,11 @@ mod tests {
|
||||||
let col = t.column("string").unwrap();
|
let col = t.column("string").unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
col.stats,
|
col.stats,
|
||||||
Statistics::String(StatValues::new_non_null(
|
Statistics::String(StatValues::new(
|
||||||
Some("bar".to_string()),
|
Some("bar".to_string()),
|
||||||
Some("foo".to_string()),
|
Some("foo".to_string()),
|
||||||
2,
|
3, // no entry for "string" column in table_a_2
|
||||||
|
1,
|
||||||
))
|
))
|
||||||
);
|
);
|
||||||
let col = t.column("int").unwrap();
|
let col = t.column("int").unwrap();
|
||||||
|
|
Loading…
Reference in New Issue