test: add property tests for byte trimmer

pull/24376/head
Edd Robinson 2022-04-13 14:23:45 +01:00
parent cf0d048037
commit e548ba7b17
4 changed files with 171 additions and 3 deletions

41
Cargo.lock generated
View File

@ -427,6 +427,21 @@ dependencies = [
"shlex",
]
[[package]]
name = "bit-set"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -1867,7 +1882,7 @@ dependencies = [
"log",
"pest",
"pest_derive",
"quick-error",
"quick-error 2.0.1",
"serde",
"serde_json",
]
@ -4402,15 +4417,18 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
dependencies = [
"bit-set",
"bitflags",
"byteorder",
"lazy_static",
"num-traits",
"quick-error",
"quick-error 2.0.1",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
]
[[package]]
@ -4605,6 +4623,12 @@ dependencies = [
"workspace-hack",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quick-error"
version = "2.0.1"
@ -4741,6 +4765,7 @@ dependencies = [
"packers",
"parking_lot 0.12.0",
"permutation",
"proptest",
"rand",
"rand_distr",
"schema",
@ -5144,6 +5169,18 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f"
[[package]]
name = "rusty-fork"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
dependencies = [
"fnv",
"quick-error 1.2.3",
"tempfile",
"wait-timeout",
]
[[package]]
name = "rustyline"
version = "9.1.2"

View File

@ -30,6 +30,7 @@ workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] # In alphabetical order
criterion = "0.3.3"
packers = { path = "../packers" }
proptest = "1.0.0"
rand = "0.8.3"
rand_distr = "0.4.2"
test_helpers = { path = "../test_helpers" }

View File

@ -0,0 +1,13 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 836582419d74d26bd8cf2f79d7bc39825100fe2d597d704fde9b7669c9f9b206 # shrinks to arr = []
cc 503e571344ed7bf06f55f02058ad7ca9c53561652d04f8ff8583b8690548503c # shrinks to arr = []
cc 0867ea10ca74e47a208731accc4ff17826a5c1a05f69cbab7c0e7db73886c3cb # shrinks to arr = []
cc 98b86d843e5d9d685c3e9ea505f15b26c247d7f7e478d98037cbfdb5c7ec7791 # shrinks to arr = []
cc 7adf8aadb9a325578d038998c20c1b7f4af0d6cd7bc16c5fa4379cf9a4471466 # shrinks to arr = []
cc 5282c6eeaef6a1096e4bcb9ac460db53d9a815eca9d8a0f29b53d70ffe78f775 # shrinks to arr = []
cc 7319175e674e0e859251ffc3c136cc163010c2ef0df90f830d067b99964081c6 # shrinks to arr = []

View File

@ -516,8 +516,9 @@ mod test {
use std::sync::Arc;
use arrow::datatypes::*;
use proptest::prelude::*;
use super::super::transcoders::MockTranscoder;
use super::super::transcoders::{ByteTrimmer, MockTranscoder, NoOpTranscoder};
use super::cmp::Operator;
use super::*;
@ -908,4 +909,120 @@ mod test {
let (v, _) = new_mock_encoding(vec![None, Some(100), Some(222)]);
assert!(v.has_any_non_null_value());
}
// This macro builds out property tests for the integer byte trimmer encoder.
// Each of the supported logical types (i64, u64) is tested with transcoders
// that store encoded values physically as (i32, u32, i16, u16, i8, u8)
// depending on logical type and value range.
macro_rules! make_test_transcoder_integer_bytetrimer {
(($logical:ty, $logical_arrow:ty, $physical:ty, $physical_arrow:ty, $fn_name:ident)) => {
proptest! {
#[test]
// The proptest strategy will generate vectors of values within the physical type
// bounds, ensuring they can be safely encoded.
// The strategy effectively says:
//
// Generate vectors of Option<T> where the value will be `None`.
// Generate values according to the provided range, and generate
// `n` of them according to the size range `0..=50`.
fn $fn_name(arr in prop::collection::vec(proptest::option::weighted(0.9, <$physical>::MIN as $logical ..=<$physical>::MAX as $logical), 0..=50)) {
// The control encoding is just a null-supporting array
// implementation with no compression. We will check that all
// encodings under test behave in the same way as this one.
let control = FixedNull::new(
PrimitiveArray::<$logical_arrow>::from(arr.clone()),
NoOpTranscoder {},
);
let transcoder = ByteTrimmer {};
let byte_trimmed = FixedNull::<$physical_arrow, $logical, _>::new(
arr.into_iter()
.map(|v| v.map(|v| transcoder.encode(v)))
.collect::<PrimitiveArray<_>>(), // encode u64 as u8,
transcoder,
);
// exercise some physical operations
let mut cases = vec![];
for op in ["<", "<=", ">", ">=", "=", "!="] {
for v in [
<$physical>::MIN,
<$physical>::MIN + 1,
<$physical>::MAX / 10,
<$physical>::MAX / 4,
<$physical>::MAX / 2,
<$physical>::MAX - 1,
<$physical>::MAX,
] {
cases.push((op, v as $logical));
}
}
for (op, v) in cases {
let row_ids_control = control.row_ids_filter(
v,
&cmp::Operator::try_from(op).unwrap(),
RowIDs::new_vector(),
);
let row_ids_trimmed = byte_trimmed.row_ids_filter(
v,
&cmp::Operator::try_from(op).unwrap(),
RowIDs::new_vector(),
);
prop_assert_eq!(row_ids_control, row_ids_trimmed)
}
}
}
};
}
make_test_transcoder_integer_bytetrimer!((
u64,
UInt64Type,
u8,
UInt8Type,
test_transcoder_byte_trim_u64_to_u8
));
make_test_transcoder_integer_bytetrimer!((
u64,
UInt64Type,
u16,
UInt16Type,
test_transcoder_byte_trim_u64_to_u16
));
make_test_transcoder_integer_bytetrimer!((
u64,
UInt64Type,
u32,
UInt32Type,
test_transcoder_byte_trim_u64_to_u32
));
make_test_transcoder_integer_bytetrimer!((
i64,
Int64Type,
i8,
Int8Type,
test_transcoder_byte_trim_i64_to_i8
));
make_test_transcoder_integer_bytetrimer!((
i64,
Int64Type,
u8,
UInt8Type,
test_transcoder_byte_trim_i64_to_u8
));
make_test_transcoder_integer_bytetrimer!((
i64,
Int64Type,
i16,
Int16Type,
test_transcoder_byte_trim_i64_to_i16
));
make_test_transcoder_integer_bytetrimer!((
i64,
Int64Type,
u16,
UInt16Type,
test_transcoder_byte_trim_i64_to_u16
));
}