test: add test for all-NULL float column metadata

pull/24376/head
Marco Neumann 2021-05-17 09:03:47 +02:00
parent 3f9bcf7cd9
commit eae56630fb
2 changed files with 44 additions and 8 deletions

View File

@ -672,7 +672,7 @@ mod tests {
let n_rows = parquet_metadata.md.file_metadata().num_rows() as u64;
assert!(n_rows >= parquet_metadata.md.num_row_groups() as u64);
for summary in &chunk.table_summary().columns {
assert_eq!(summary.count(), n_rows);
assert!(summary.count() <= n_rows);
}
// check column names

View File

@ -259,7 +259,7 @@ fn create_column_field_u64(
fn create_column_field_f64(
name: &str,
data: Vec<Vec<f64>>,
data: Vec<Vec<Option<f64>>>,
arrow_cols: &mut Vec<Vec<(String, ArrayRef, bool)>>,
summaries: &mut Vec<ColumnSummary>,
schema_builder: &mut SchemaBuilder,
@ -280,16 +280,18 @@ fn create_column_field_f64(
min: data
.iter()
.flatten()
.filter_map(|x| x.as_ref())
.filter(|x| !x.is_nan())
.min_by(|a, b| a.partial_cmp(b).unwrap())
.cloned(),
max: data
.iter()
.flatten()
.filter_map(|x| x.as_ref())
.filter(|x| !x.is_nan())
.max_by(|a, b| a.partial_cmp(b).unwrap())
.cloned(),
count: data.iter().map(Vec::len).sum::<usize>() as u64,
count: data.iter().flatten().filter_map(|x| x.as_ref()).count() as u64,
distinct_count: None,
}),
});
@ -463,21 +465,33 @@ pub fn make_record_batch(
// field: f64
create_column_field_f64(
&format!("{}_field_f64_normal", column_prefix),
vec![vec![10.1], vec![20.1], vec![30.1, 40.1]],
vec![
vec![Some(10.1)],
vec![Some(20.1)],
vec![Some(30.1), Some(40.1)],
],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
);
create_column_field_f64(
&format!("{}_field_f64_inf", column_prefix),
vec![vec![0.0], vec![f64::INFINITY], vec![f64::NEG_INFINITY, 1.0]],
vec![
vec![Some(0.0)],
vec![Some(f64::INFINITY)],
vec![Some(f64::NEG_INFINITY), Some(1.0)],
],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
);
create_column_field_f64(
&format!("{}_field_f64_zero", column_prefix),
vec![vec![0.0], vec![-0.0], vec![0.0, -0.0]],
vec![
vec![Some(0.0)],
vec![Some(-0.0)],
vec![Some(0.0), Some(-0.0)],
],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
@ -488,14 +502,36 @@ pub fn make_record_batch(
assert!(nan2.is_nan());
create_column_field_f64(
&format!("{}_field_f64_nan_some", column_prefix),
vec![vec![nan1], vec![2.0], vec![1.0, nan2]],
vec![
vec![Some(nan1)],
vec![Some(2.0)],
vec![Some(1.0), Some(nan2)],
],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
);
create_column_field_f64(
&format!("{}_field_f64_nan_all", column_prefix),
vec![vec![nan1], vec![nan2], vec![nan1, nan2]],
vec![
vec![Some(nan1)],
vec![Some(nan2)],
vec![Some(nan1), Some(nan2)],
],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
);
create_column_field_f64(
&format!("{}_field_f64_null_some", column_prefix),
vec![vec![None], vec![Some(20.1)], vec![Some(30.1), None]],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,
);
create_column_field_f64(
&format!("{}_field_f64_null_all", column_prefix),
vec![vec![None], vec![None], vec![None, None]],
&mut arrow_cols,
&mut summaries,
&mut schema_builder,