Merge pull request #21 from influxdata/er-encoder-bench

test: add encoder/decoder benchmarks
pull/24376/head
Edd Robinson 2020-02-28 13:05:00 +00:00 committed by GitHub
commit bfe17259f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 45804 additions and 294 deletions

563
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -39,10 +39,17 @@ croaring = "0.4.2"
http = "0.2.0" http = "0.2.0"
serde_urlencoded = "0.6.1" serde_urlencoded = "0.6.1"
[build-dependencies]
tonic-build = "0.1.1"
[dev-dependencies] [dev-dependencies]
criterion = "0.3"
reqwest = { version = "0.10.1", features = ["blocking"] } reqwest = { version = "0.10.1", features = ["blocking"] }
assert_cmd = "0.12.0" assert_cmd = "0.12.0"
rand = "0.7.2" rand = "0.7.2"
[[bench]]
name = "encoders"
harness = false
[build-dependencies]
tonic-build = "0.1.1"
rand = "0.7.2"

352
benches/encoders.rs Normal file
View File

@ -0,0 +1,352 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rand::{distributions::Uniform, Rng};
use std::convert::TryFrom;
use std::mem;
mod fixtures;
const LARGER_BATCH_SIZES: [usize; 12] = [
10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 50_000, 100_000,
];
const SMALLER_BATCH_SIZES: [usize; 11] =
[10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 45_000];
type EncodeFn<T> = fn(src: &[T], dst: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>>;
type DecodeFn<T> = fn(src: &[u8], dst: &mut Vec<T>) -> Result<(), Box<dyn std::error::Error>>;
fn benchmark_encode_sequential<T: From<i32>>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
encode: EncodeFn<T>,
) {
benchmark_encode(
c,
benchmark_group_name,
batch_sizes,
|batch_size| (1..batch_size).map(convert_from_usize).collect(),
encode,
);
}
fn benchmark_encode<T>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
decoded_value_generation: fn(batch_size: usize) -> Vec<T>,
encode: EncodeFn<T>,
) {
let mut group = c.benchmark_group(benchmark_group_name);
for &batch_size in batch_sizes {
group.throughput(Throughput::Bytes(
u64::try_from(batch_size * mem::size_of::<T>()).unwrap(),
));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&batch_size,
|b, &batch_size| {
let decoded = decoded_value_generation(batch_size);
let mut encoded = vec![];
b.iter(|| {
encode(&decoded, &mut encoded).unwrap();
});
},
);
}
group.finish();
}
fn benchmark_decode<T>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
input_value_generation: fn(batch_size: usize) -> (usize, Vec<u8>),
decode: DecodeFn<T>,
) {
let mut group = c.benchmark_group(benchmark_group_name);
for &batch_size in batch_sizes {
let (decoded_len, encoded) = input_value_generation(batch_size);
group.throughput(Throughput::Bytes(u64::try_from(encoded.len()).unwrap()));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&decoded_len,
|b, &decoded_len| {
let mut decoded_mut = Vec::with_capacity(decoded_len);
b.iter(|| {
decoded_mut.truncate(0);
decode(&encoded, &mut decoded_mut).unwrap();
});
},
);
}
group.finish();
}
fn convert_from_usize<T: From<i32>>(a: usize) -> T {
i32::try_from(a).unwrap().into()
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 33 26.4 bits/value
// 25 52 16.64 bits/value
// 50 78 12.48 bits/value
// 100 129 10.32 bits/value
// 250 290 9.28 bits/value
// 500 584 9.34 bits/value
// 750 878 9.36 bits/value
// 1000 1221 9.76 bits/value
// 5000 7013 11.22 bits/value
// 10000 15145 12.11 bits/value
// 50000 90090 14.41 bits/value
// 100000 192481 15.39 bits/value
//
fn float_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"float_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::float::encode,
);
}
// The current integer encoder produces the following compression. Note, since
// a sequential range of values can be encoded using RLE the compression
// statistics are not very interesting.
//
// values block size compression
// 10 11 8.80 bits/value
// 25 11 3.52 bits/value
// 50 11 1.76 bits/value
// 100 11 0.88 bits/value
// 250 12 0.38 bits/value
// 500 12 0.19 bits/value
// 750 12 0.12 bits/value
// 1000 12 0.09 bits/value
// 5000 12 0.01 bits/value
// 10000 12 0.00 bits/value
// 50000 13 0.00 bits/value
// 100000 13 0.00 bits/value
//
fn integer_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"integer_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::integer::encode,
);
}
fn timestamp_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"timestamp_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::timestamp::encode,
);
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 32 25.6 bits/value
// 25 76 24.32 bits/value
// 50 86 13.76 bits/value
// 100 167 13.36 bits/value
// 250 388 12.41 bits/value
// 500 1165 18.64 bits/value
// 750 1769 18.86 bits/value
// 1000 2366 18.92 bits/value
// 5000 11785 18.85 bits/value
// 10000 23559 18.84 bits/value
// 50000 117572 18.81 bits/value
// 100000 235166 18.81 bits/value
//
fn float_encode_random(c: &mut Criterion) {
benchmark_encode(
c,
"float_encode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let range = Uniform::from(0.0..100.0);
rand::thread_rng()
.sample_iter(&range)
.take(batch_size)
.collect()
},
delorean::encoders::float::encode,
)
}
// The current integer encoder produces the following compression:
//
// values block size compression
// 10 25 20.00 bits/value
// 25 33 10.56 bits/value
// 50 65 10.40 bits/value
// 100 121 9.68 bits/value
// 250 281 8.99 bits/value
// 500 561 8.97 bits/value
// 750 833 8.88 bits/value
// 1000 1105 8.84 bits/value
// 5000 5425 8.68 bits/value
// 10000 10865 8.69 bits/value
// 50000 54361 8.69 bits/value
// 100000 108569 8.68 bits/value
//
fn integer_encode_random(c: &mut Criterion) {
benchmark_encode(
c,
"integer_encode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
(1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.collect()
},
delorean::encoders::integer::encode,
)
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 91 72.8 bits/value
// 25 208 66.56 bits/value
// 50 411 65.76 bits/value
// 100 809 64.72 bits/value
// 250 2028 64.89 bits/value
// 500 4059 64.94 bits/value
// 750 6091 64.97 bits/value
// 1000 8122 64.97 bits/value
// 5000 40614 64.98 bits/value
// 10000 81223 64.97 bits/value
// 45000 365470 64.97 bits/value
//
fn float_encode_cpu(c: &mut Criterion) {
benchmark_encode(
c,
"float_encode_cpu",
&SMALLER_BATCH_SIZES,
|batch_size| fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec(),
delorean::encoders::float::encode,
)
}
fn float_decode_cpu(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_cpu",
&SMALLER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<f64> = fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn float_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<f64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn integer_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"integer_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::integer::decode,
)
}
fn timestamp_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"timestamp_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::timestamp::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::timestamp::decode,
)
}
fn float_decode_random(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let range = Uniform::from(0.0..100.0);
let decoded: Vec<_> = rand::thread_rng()
.sample_iter(&range)
.take(batch_size)
.collect();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn integer_decode_random(c: &mut Criterion) {
benchmark_decode(
c,
"integer_decode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.collect();
let mut encoded = vec![];
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::integer::decode,
)
}
criterion_group!(
benches,
float_encode_sequential,
integer_encode_sequential,
timestamp_encode_sequential,
float_encode_random,
integer_encode_random,
float_encode_cpu,
float_decode_cpu,
float_decode_sequential,
integer_decode_sequential,
timestamp_decode_sequential,
float_decode_random,
integer_decode_random,
);
criterion_main!(benches);

45003
benches/fixtures.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
mod float; pub mod float;
mod integer; pub mod integer;
mod simple8b; mod simple8b;
mod timestamp; pub mod timestamp;

View File

@ -12,7 +12,7 @@ fn is_sentinel_u64(v: u64) -> bool {
v == SENTINEL v == SENTINEL
} }
/// encode_all encodes a vector of floats into dst. /// encode encodes a vector of floats into dst.
/// ///
/// The encoding used is equivalent to the encoding of floats in the Gorilla /// The encoding used is equivalent to the encoding of floats in the Gorilla
/// paper. Each subsequent value is compared to the previous and the XOR of the /// paper. Each subsequent value is compared to the previous and the XOR of the
@ -20,7 +20,7 @@ fn is_sentinel_u64(v: u64) -> bool {
/// representations based on those are stored. /// representations based on those are stored.
#[allow(dead_code)] #[allow(dead_code)]
#[allow(clippy::many_single_char_names)] #[allow(clippy::many_single_char_names)]
pub fn encode_all(src: &mut Vec<f64>, dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> { pub fn encode(src: &[f64], dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer. dst.truncate(0); // reset buffer.
if src.is_empty() { if src.is_empty() {
return Ok(()); return Ok(());
@ -313,11 +313,11 @@ const BIT_MASK: [u64; 64] = [
0x7fff_ffff_ffff_ffff, 0x7fff_ffff_ffff_ffff,
]; ];
/// decode_all decodes a slice of bytes into a vector of floats. /// decode decodes a slice of bytes into a vector of floats.
#[allow(dead_code)] #[allow(dead_code)]
#[allow(clippy::many_single_char_names)] #[allow(clippy::many_single_char_names)]
#[allow(clippy::useless_let_if_seq)] #[allow(clippy::useless_let_if_seq)]
pub fn decode_all(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> { pub fn decode(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> {
if src.len() < 9 { if src.len() < 9 {
return Ok(()); return Ok(());
} }
@ -494,12 +494,12 @@ mod tests {
use crate::tests::approximately_equal; use crate::tests::approximately_equal;
#[test] #[test]
fn encode_all_no_values() { fn encode_no_values() {
let mut src: Vec<f64> = vec![]; let src: Vec<f64> = vec![];
let mut dst = vec![]; let mut dst = vec![];
// check for error // check for error
super::encode_all(&mut src, &mut dst).expect("failed to encode src"); super::encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values. // verify encoded no values.
let exp: Vec<u8> = Vec::new(); let exp: Vec<u8> = Vec::new();
@ -507,8 +507,8 @@ mod tests {
} }
#[test] #[test]
fn encode_all_special_values() { fn encode_special_values() {
let mut src: Vec<f64> = vec![ let src: Vec<f64> = vec![
100.0, 100.0,
222.12, 222.12,
f64::from_bits(0x7ff8000000000001), // Go representation of signalling NaN f64::from_bits(0x7ff8000000000001), // Go representation of signalling NaN
@ -526,10 +526,10 @@ mod tests {
let mut dst = vec![]; let mut dst = vec![];
// check for error // check for error
super::encode_all(&mut src, &mut dst).expect("failed to encode src"); super::encode(&src, &mut dst).expect("failed to encode src");
let mut got = vec![]; let mut got = vec![];
super::decode_all(&dst, &mut got).expect("failed to decode"); super::decode(&dst, &mut got).expect("failed to decode");
// Verify decoded values. // Verify decoded values.
assert_eq!(got.len(), src.len()); assert_eq!(got.len(), src.len());
@ -544,7 +544,7 @@ mod tests {
} }
#[test] #[test]
fn encode_all() { fn encode() {
struct Test { struct Test {
name: String, name: String,
input: Vec<f64>, input: Vec<f64>,
@ -1647,14 +1647,14 @@ mod tests {
]; ];
for test in tests { for test in tests {
let mut dst = vec![]; let mut dst = vec![];
let mut src = test.input.clone(); let src = test.input;
let exp = test.input;
super::encode_all(&mut src, &mut dst).expect("failed to encode"); super::encode(&src, &mut dst).expect("failed to encode");
let mut got = vec![]; let mut got = vec![];
super::decode_all(&dst, &mut got).expect("failed to decode"); super::decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp, "{}", test.name); assert_eq!(got, src, "{}", test.name);
} }
} }
} }

View File

@ -9,21 +9,21 @@ enum Encoding {
Rle = 2, Rle = 2,
} }
/// encode_all encodes a vector of signed integers into dst. /// encode encodes a vector of signed integers into dst.
/// ///
/// Deltas between the integers in the vector are first calculated, and these /// Deltas between the integers in the vector are first calculated, and these
/// deltas are then zig-zag encoded. The resulting zig-zag encoded deltas are /// deltas are then zig-zag encoded. The resulting zig-zag encoded deltas are
/// further compressed if possible, either via bit-packing using simple8b or by /// further compressed if possible, either via bit-packing using simple8b or by
/// run-length encoding the deltas if they're all the same. /// run-length encoding the deltas if they're all the same.
#[allow(dead_code)] #[allow(dead_code)]
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> { pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer. dst.truncate(0); // reset buffer.
if src.is_empty() { if src.is_empty() {
return Ok(()); return Ok(());
} }
let mut max: u64 = 0; let mut max: u64 = 0;
let mut deltas = i64_to_u64_vector(src); let mut deltas = i64_to_u64_vector(&src);
for i in (1..deltas.len()).rev() { for i in (1..deltas.len()).rev() {
deltas[i] = zig_zag_encode(deltas[i].wrapping_sub(deltas[i - 1]) as i64); deltas[i] = zig_zag_encode(deltas[i].wrapping_sub(deltas[i - 1]) as i64);
if deltas[i] > max { if deltas[i] > max {
@ -67,7 +67,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
// first 4 high bits used for encoding type // first 4 high bits used for encoding type
dst.push((Encoding::Simple8b as u8) << 4); dst.push((Encoding::Simple8b as u8) << 4);
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
simple8b::encode_all(&deltas[1..], dst) simple8b::encode(&deltas[1..], dst)
} }
// zig_zag_encode converts a signed integer into an unsigned one by zig zagging // zig_zag_encode converts a signed integer into an unsigned one by zig zagging
@ -114,9 +114,9 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n); dst.truncate(n);
} }
/// decode_all decodes a slice of bytes into a vector of signed integers. /// decode decodes a slice of bytes into a vector of signed integers.
#[allow(dead_code)] #[allow(dead_code)]
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> { pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
if src.is_empty() { if src.is_empty() {
return Ok(()); return Ok(());
} }
@ -198,7 +198,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
buf.copy_from_slice(&src[0..8]); buf.copy_from_slice(&src[0..8]);
dst.push(zig_zag_decode(u64::from_be_bytes(buf))); dst.push(zig_zag_decode(u64::from_be_bytes(buf)));
simple8b::decode_all(&src[8..], &mut res); simple8b::decode(&src[8..], &mut res);
// TODO(edd): fix this. It's copying, which is slowwwwwwwww. // TODO(edd): fix this. It's copying, which is slowwwwwwwww.
let mut next = dst[0]; let mut next = dst[0];
for v in &res { for v in &res {
@ -227,36 +227,36 @@ mod tests {
} }
#[test] #[test]
fn encode_all_no_values() { fn encode_no_values() {
let mut src: Vec<i64> = vec![]; let src: Vec<i64> = vec![];
let mut dst = vec![]; let mut dst = vec![];
// check for error // check for error
encode_all(&mut src, &mut dst).expect("failed to encode src"); encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values. // verify encoded no values.
assert_eq!(dst.to_vec().len(), 0); assert_eq!(dst.to_vec().len(), 0);
} }
#[test] #[test]
fn encode_all_uncompressed() { fn encode_uncompressed() {
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421]; let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
let mut dst = vec![]; let mut dst = vec![];
let exp = src.clone(); let exp = src.clone();
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify uncompressed encoding used // verify uncompressed encoding used
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8); assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp); assert_eq!(got, exp);
} }
#[test] #[test]
fn encode_all_rle() { fn encode_rle() {
struct Test { struct Test {
name: String, name: String,
input: Vec<i64>, input: Vec<i64>,
@ -291,21 +291,21 @@ mod tests {
for test in tests { for test in tests {
let mut dst = vec![]; let mut dst = vec![];
let mut src = test.input.clone(); let src = test.input.clone();
let exp = test.input; let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify RLE encoding used // verify RLE encoding used
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8); assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp, "{}", test.name); assert_eq!(got, exp, "{}", test.name);
} }
} }
#[test] #[test]
fn encode_all_simple8b() { fn encode_simple8b() {
struct Test { struct Test {
name: String, name: String,
input: Vec<i64>, input: Vec<i64>,
@ -328,14 +328,14 @@ mod tests {
for test in tests { for test in tests {
let mut dst = vec![]; let mut dst = vec![];
let mut src = test.input.clone(); let src = test.input.clone();
let exp = test.input; let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify Simple8b encoding used // verify Simple8b encoding used
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8); assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp, "{}", test.name); assert_eq!(got, exp, "{}", test.name);
} }

View File

@ -25,9 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
[1, 60], [1, 60],
]; ];
/// encode_all packs and binary encodes the provides slice of u64 values using /// encode packs and binary encodes the provides slice of u64 values using
/// simple8b into the provided vector. /// simple8b into the provided vector.
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> { pub fn encode<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
let mut i = 0; let mut i = 0;
'next_value: while i < src.len() { 'next_value: while i < src.len() {
// try to pack a run of 240 or 120 1s // try to pack a run of 240 or 120 1s
@ -79,9 +79,9 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn E
Ok(()) Ok(())
} }
/// decode_all decodes and unpacks the binary-encoded values stored in src into /// decode decodes and unpacks the binary-encoded values stored in src into
/// dst. /// dst.
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) { pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
let mut i = 0; let mut i = 0;
let mut j = 0; let mut j = 0;
let mut buf: [u8; 8] = [0; 8]; let mut buf: [u8; 8] = [0; 8];
@ -90,14 +90,13 @@ pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
dst.resize(j + 240, 0); // may need 240 capacity dst.resize(j + 240, 0); // may need 240 capacity
} }
buf.copy_from_slice(&src[i..i + 8]); buf.copy_from_slice(&src[i..i + 8]);
j += decode(u64::from_be_bytes(buf), &mut dst[j..]); j += decode_value(u64::from_be_bytes(buf), &mut dst[j..]);
i += 8; i += 8;
} }
dst.truncate(j); dst.truncate(j);
} }
#[allow(dead_code)] fn decode_value(v: u64, dst: &mut [u64]) -> usize {
pub fn decode(v: u64, dst: &mut [u64]) -> usize {
let sel = v >> S8B_BIT_SIZE as u64; let sel = v >> S8B_BIT_SIZE as u64;
let mut v = v; let mut v = v;
match sel { match sel {
@ -225,7 +224,7 @@ mod tests {
let mut dst = vec![]; let mut dst = vec![];
// check for error // check for error
encode_all(&src, &mut dst).expect("failed to encode src"); encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values. // verify encoded no values.
assert_eq!(dst.len(), src.len()) assert_eq!(dst.len(), src.len())
@ -237,9 +236,9 @@ mod tests {
let mut encoded = vec![]; let mut encoded = vec![];
let mut decoded = vec![]; let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode"); encode(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 16); // verify vector is truncated. assert_eq!(encoded.len(), 16); // verify vector is truncated.
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes"); assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
} }
@ -249,9 +248,9 @@ mod tests {
let mut encoded = vec![]; let mut encoded = vec![];
let mut decoded = vec![]; let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode"); encode(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 24); // verify vector is truncated. assert_eq!(encoded.len(), 24); // verify vector is truncated.
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes"); assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
} }
@ -260,12 +259,12 @@ mod tests {
let src = vec![7, 6, 2 << (61 - 1), 4, 3, 2, 1]; let src = vec![7, 6, 2 << (61 - 1), 4, 3, 2, 1];
let mut encoded = vec![]; let mut encoded = vec![];
let result = encode_all(&src, &mut encoded); let result = encode(&src, &mut encoded);
assert_eq!(result.unwrap_err().to_string(), "value out of bounds"); assert_eq!(result.unwrap_err().to_string(), "value out of bounds");
} }
#[test] #[test]
fn test_encode_all() { fn test_encode() {
struct Test { struct Test {
name: String, name: String,
// TODO(edd): no idea how to store the closure in the struct rather than the // TODO(edd): no idea how to store the closure in the struct rather than the
@ -338,9 +337,9 @@ mod tests {
for test in tests { for test in tests {
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&test.input, &mut encoded).expect("failed to encode"); encode(&test.input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), test.input, "{}", test.name); assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
} }
@ -350,27 +349,27 @@ mod tests {
let mut input = ones(240)(); let mut input = ones(240)();
input[120] = 5; input[120] = 5;
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "120 ones"); assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
input = ones(240)(); input = ones(240)();
input[119] = 5; input[119] = 5;
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "119 ones"); assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
input = ones(241)(); input = ones(241)();
input[239] = 5; input[239] = 5;
let mut encoded = vec![]; let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode"); encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![]; let mut decoded = vec![];
decode_all(&encoded, &mut decoded); decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "239 ones"); assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
} }

View File

@ -9,7 +9,7 @@ enum Encoding {
Rle = 2, Rle = 2,
} }
/// encode_all encodes a vector of signed integers into a slice of bytes. /// encode encodes a vector of signed integers into a slice of bytes.
/// ///
/// To maximise compression, the provided vector should be sorted in ascending /// To maximise compression, the provided vector should be sorted in ascending
/// order. First deltas between the integers are determined, then further encoding /// order. First deltas between the integers are determined, then further encoding
@ -17,7 +17,7 @@ enum Encoding {
/// encoded using RLE. If not, as long as the deltas are not bigger than simple8b::MAX_VALUE /// encoded using RLE. If not, as long as the deltas are not bigger than simple8b::MAX_VALUE
/// they can be encoded using simple8b. /// they can be encoded using simple8b.
#[allow(dead_code)] #[allow(dead_code)]
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> { pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer. dst.truncate(0); // reset buffer.
if src.is_empty() { if src.is_empty() {
return Ok(()); return Ok(());
@ -86,7 +86,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
dst.push((Encoding::Simple8b as u8) << 4); dst.push((Encoding::Simple8b as u8) << 4);
dst[0] |= ((div as f64).log10()) as u8; // 4 low bits used for log10 divisor dst[0] |= ((div as f64).log10()) as u8; // 4 low bits used for log10 divisor
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
simple8b::encode_all(&deltas[1..], dst) simple8b::encode(&deltas[1..], dst)
} }
// i64_to_u64_vector converts a Vec<i64> to Vec<u64>. // i64_to_u64_vector converts a Vec<i64> to Vec<u64>.
@ -142,10 +142,10 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n); dst.truncate(n);
} }
/// decode_all decodes a slice of bytes encoded using encode_all back into a /// decode decodes a slice of bytes encoded using encode back into a
/// vector of signed integers. /// vector of signed integers.
#[allow(dead_code)] #[allow(dead_code)]
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> { pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
if src.is_empty() { if src.is_empty() {
return Ok(()); return Ok(());
} }
@ -234,7 +234,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
buf.copy_from_slice(&src[1..9]); buf.copy_from_slice(&src[1..9]);
dst.push(i64::from_be_bytes(buf)); dst.push(i64::from_be_bytes(buf));
simple8b::decode_all(&src[9..], &mut res); simple8b::decode(&src[9..], &mut res);
let mut next = dst[dst.len() - 1]; let mut next = dst[dst.len() - 1];
if scaler > 1 { if scaler > 1 {
// TODO(edd): fix this. It's copying, which is slowwwwwwwww. // TODO(edd): fix this. It's copying, which is slowwwwwwwww.
@ -259,37 +259,37 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn encode_all_no_values() { fn encode_no_values() {
let mut src: Vec<i64> = vec![]; let src: Vec<i64> = vec![];
let mut dst = vec![]; let mut dst = vec![];
// check for error // check for error
encode_all(&mut src, &mut dst).expect("failed to encode src"); encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values. // verify encoded no values.
assert_eq!(dst.len(), 0); assert_eq!(dst.len(), 0);
} }
#[test] #[test]
fn encode_all_uncompressed() { fn encode_uncompressed() {
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421]; let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
let mut dst = vec![]; let mut dst = vec![];
let exp = src.clone(); let exp = src.clone();
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify uncompressed encoding used // verify uncompressed encoding used
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8); assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp); assert_eq!(got, exp);
} }
#[test] #[test]
fn encode_all_rle() { fn encode_rle() {
struct Test { struct Test {
name: String, name: String,
input: Vec<i64>, input: Vec<i64>,
@ -332,22 +332,22 @@ mod tests {
for test in tests { for test in tests {
let mut dst = vec![]; let mut dst = vec![];
let mut src = test.input.clone(); let src = test.input.clone();
let exp = test.input; let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify RLE encoding used // verify RLE encoding used
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8); assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp, "{}", test.name); assert_eq!(got, exp, "{}", test.name);
} }
} }
#[test] #[test]
fn encode_all_simple8b() { fn encode_simple8b() {
struct Test { struct Test {
name: String, name: String,
input: Vec<i64>, input: Vec<i64>,
@ -370,14 +370,14 @@ mod tests {
for test in tests { for test in tests {
let mut dst = vec![]; let mut dst = vec![];
let mut src = test.input.clone(); let src = test.input.clone();
let exp = test.input; let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode"); encode(&src, &mut dst).expect("failed to encode");
// verify Simple8b encoding used // verify Simple8b encoding used
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8); assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
let mut got = vec![]; let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode"); decode(&dst, &mut got).expect("failed to decode");
// verify got same values back // verify got same values back
assert_eq!(got, exp, "{}", test.name); assert_eq!(got, exp, "{}", test.name);
} }