From eae56630fb6151f916d93900440bf331e52ed9a3 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Mon, 17 May 2021 09:03:47 +0200 Subject: [PATCH] test: add test for all-NULL float column metadata --- parquet_file/src/metadata.rs | 2 +- parquet_file/src/test_utils.rs | 50 +++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index e00bec6248..7fa5a8762b 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -672,7 +672,7 @@ mod tests { let n_rows = parquet_metadata.md.file_metadata().num_rows() as u64; assert!(n_rows >= parquet_metadata.md.num_row_groups() as u64); for summary in &chunk.table_summary().columns { - assert_eq!(summary.count(), n_rows); + assert!(summary.count() <= n_rows); } // check column names diff --git a/parquet_file/src/test_utils.rs b/parquet_file/src/test_utils.rs index 37c8932497..9cb2111b69 100644 --- a/parquet_file/src/test_utils.rs +++ b/parquet_file/src/test_utils.rs @@ -259,7 +259,7 @@ fn create_column_field_u64( fn create_column_field_f64( name: &str, - data: Vec>, + data: Vec>>, arrow_cols: &mut Vec>, summaries: &mut Vec, schema_builder: &mut SchemaBuilder, @@ -280,16 +280,18 @@ fn create_column_field_f64( min: data .iter() .flatten() + .filter_map(|x| x.as_ref()) .filter(|x| !x.is_nan()) .min_by(|a, b| a.partial_cmp(b).unwrap()) .cloned(), max: data .iter() .flatten() + .filter_map(|x| x.as_ref()) .filter(|x| !x.is_nan()) .max_by(|a, b| a.partial_cmp(b).unwrap()) .cloned(), - count: data.iter().map(Vec::len).sum::() as u64, + count: data.iter().flatten().filter_map(|x| x.as_ref()).count() as u64, distinct_count: None, }), }); @@ -463,21 +465,33 @@ pub fn make_record_batch( // field: f64 create_column_field_f64( &format!("{}_field_f64_normal", column_prefix), - vec![vec![10.1], vec![20.1], vec![30.1, 40.1]], + vec![ + vec![Some(10.1)], + vec![Some(20.1)], + vec![Some(30.1), Some(40.1)], + ], &mut arrow_cols, &mut summaries, &mut schema_builder, ); create_column_field_f64( &format!("{}_field_f64_inf", column_prefix), - vec![vec![0.0], vec![f64::INFINITY], vec![f64::NEG_INFINITY, 1.0]], + vec![ + vec![Some(0.0)], + vec![Some(f64::INFINITY)], + vec![Some(f64::NEG_INFINITY), Some(1.0)], + ], &mut arrow_cols, &mut summaries, &mut schema_builder, ); create_column_field_f64( &format!("{}_field_f64_zero", column_prefix), - vec![vec![0.0], vec![-0.0], vec![0.0, -0.0]], + vec![ + vec![Some(0.0)], + vec![Some(-0.0)], + vec![Some(0.0), Some(-0.0)], + ], &mut arrow_cols, &mut summaries, &mut schema_builder, @@ -488,14 +502,36 @@ pub fn make_record_batch( assert!(nan2.is_nan()); create_column_field_f64( &format!("{}_field_f64_nan_some", column_prefix), - vec![vec![nan1], vec![2.0], vec![1.0, nan2]], + vec![ + vec![Some(nan1)], + vec![Some(2.0)], + vec![Some(1.0), Some(nan2)], + ], &mut arrow_cols, &mut summaries, &mut schema_builder, ); create_column_field_f64( &format!("{}_field_f64_nan_all", column_prefix), - vec![vec![nan1], vec![nan2], vec![nan1, nan2]], + vec![ + vec![Some(nan1)], + vec![Some(nan2)], + vec![Some(nan1), Some(nan2)], + ], + &mut arrow_cols, + &mut summaries, + &mut schema_builder, + ); + create_column_field_f64( + &format!("{}_field_f64_null_some", column_prefix), + vec![vec![None], vec![Some(20.1)], vec![Some(30.1), None]], + &mut arrow_cols, + &mut summaries, + &mut schema_builder, + ); + create_column_field_f64( + &format!("{}_field_f64_null_all", column_prefix), + vec![vec![None], vec![None], vec![None, None]], &mut arrow_cols, &mut summaries, &mut schema_builder,