Merge pull request #21 from influxdata/er-encoder-bench

test: add encoder/decoder benchmarks
pull/24376/head
Edd Robinson 2020-02-28 13:05:00 +00:00 committed by GitHub
commit bfe17259f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 45804 additions and 294 deletions

563
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -39,10 +39,17 @@ croaring = "0.4.2"
http = "0.2.0"
serde_urlencoded = "0.6.1"
[build-dependencies]
tonic-build = "0.1.1"
[dev-dependencies]
criterion = "0.3"
reqwest = { version = "0.10.1", features = ["blocking"] }
assert_cmd = "0.12.0"
rand = "0.7.2"
[[bench]]
name = "encoders"
harness = false
[build-dependencies]
tonic-build = "0.1.1"
rand = "0.7.2"

352
benches/encoders.rs Normal file
View File

@ -0,0 +1,352 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rand::{distributions::Uniform, Rng};
use std::convert::TryFrom;
use std::mem;
mod fixtures;
const LARGER_BATCH_SIZES: [usize; 12] = [
10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 50_000, 100_000,
];
const SMALLER_BATCH_SIZES: [usize; 11] =
[10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 45_000];
type EncodeFn<T> = fn(src: &[T], dst: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>>;
type DecodeFn<T> = fn(src: &[u8], dst: &mut Vec<T>) -> Result<(), Box<dyn std::error::Error>>;
fn benchmark_encode_sequential<T: From<i32>>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
encode: EncodeFn<T>,
) {
benchmark_encode(
c,
benchmark_group_name,
batch_sizes,
|batch_size| (1..batch_size).map(convert_from_usize).collect(),
encode,
);
}
fn benchmark_encode<T>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
decoded_value_generation: fn(batch_size: usize) -> Vec<T>,
encode: EncodeFn<T>,
) {
let mut group = c.benchmark_group(benchmark_group_name);
for &batch_size in batch_sizes {
group.throughput(Throughput::Bytes(
u64::try_from(batch_size * mem::size_of::<T>()).unwrap(),
));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&batch_size,
|b, &batch_size| {
let decoded = decoded_value_generation(batch_size);
let mut encoded = vec![];
b.iter(|| {
encode(&decoded, &mut encoded).unwrap();
});
},
);
}
group.finish();
}
fn benchmark_decode<T>(
c: &mut Criterion,
benchmark_group_name: &str,
batch_sizes: &[usize],
input_value_generation: fn(batch_size: usize) -> (usize, Vec<u8>),
decode: DecodeFn<T>,
) {
let mut group = c.benchmark_group(benchmark_group_name);
for &batch_size in batch_sizes {
let (decoded_len, encoded) = input_value_generation(batch_size);
group.throughput(Throughput::Bytes(u64::try_from(encoded.len()).unwrap()));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
&decoded_len,
|b, &decoded_len| {
let mut decoded_mut = Vec::with_capacity(decoded_len);
b.iter(|| {
decoded_mut.truncate(0);
decode(&encoded, &mut decoded_mut).unwrap();
});
},
);
}
group.finish();
}
fn convert_from_usize<T: From<i32>>(a: usize) -> T {
i32::try_from(a).unwrap().into()
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 33 26.4 bits/value
// 25 52 16.64 bits/value
// 50 78 12.48 bits/value
// 100 129 10.32 bits/value
// 250 290 9.28 bits/value
// 500 584 9.34 bits/value
// 750 878 9.36 bits/value
// 1000 1221 9.76 bits/value
// 5000 7013 11.22 bits/value
// 10000 15145 12.11 bits/value
// 50000 90090 14.41 bits/value
// 100000 192481 15.39 bits/value
//
fn float_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"float_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::float::encode,
);
}
// The current integer encoder produces the following compression. Note, since
// a sequential range of values can be encoded using RLE the compression
// statistics are not very interesting.
//
// values block size compression
// 10 11 8.80 bits/value
// 25 11 3.52 bits/value
// 50 11 1.76 bits/value
// 100 11 0.88 bits/value
// 250 12 0.38 bits/value
// 500 12 0.19 bits/value
// 750 12 0.12 bits/value
// 1000 12 0.09 bits/value
// 5000 12 0.01 bits/value
// 10000 12 0.00 bits/value
// 50000 13 0.00 bits/value
// 100000 13 0.00 bits/value
//
fn integer_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"integer_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::integer::encode,
);
}
fn timestamp_encode_sequential(c: &mut Criterion) {
benchmark_encode_sequential(
c,
"timestamp_encode_sequential",
&LARGER_BATCH_SIZES,
delorean::encoders::timestamp::encode,
);
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 32 25.6 bits/value
// 25 76 24.32 bits/value
// 50 86 13.76 bits/value
// 100 167 13.36 bits/value
// 250 388 12.41 bits/value
// 500 1165 18.64 bits/value
// 750 1769 18.86 bits/value
// 1000 2366 18.92 bits/value
// 5000 11785 18.85 bits/value
// 10000 23559 18.84 bits/value
// 50000 117572 18.81 bits/value
// 100000 235166 18.81 bits/value
//
fn float_encode_random(c: &mut Criterion) {
benchmark_encode(
c,
"float_encode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let range = Uniform::from(0.0..100.0);
rand::thread_rng()
.sample_iter(&range)
.take(batch_size)
.collect()
},
delorean::encoders::float::encode,
)
}
// The current integer encoder produces the following compression:
//
// values block size compression
// 10 25 20.00 bits/value
// 25 33 10.56 bits/value
// 50 65 10.40 bits/value
// 100 121 9.68 bits/value
// 250 281 8.99 bits/value
// 500 561 8.97 bits/value
// 750 833 8.88 bits/value
// 1000 1105 8.84 bits/value
// 5000 5425 8.68 bits/value
// 10000 10865 8.69 bits/value
// 50000 54361 8.69 bits/value
// 100000 108569 8.68 bits/value
//
fn integer_encode_random(c: &mut Criterion) {
benchmark_encode(
c,
"integer_encode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
(1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.collect()
},
delorean::encoders::integer::encode,
)
}
// The current float encoder produces the following compression:
//
// values block size compression
// 10 91 72.8 bits/value
// 25 208 66.56 bits/value
// 50 411 65.76 bits/value
// 100 809 64.72 bits/value
// 250 2028 64.89 bits/value
// 500 4059 64.94 bits/value
// 750 6091 64.97 bits/value
// 1000 8122 64.97 bits/value
// 5000 40614 64.98 bits/value
// 10000 81223 64.97 bits/value
// 45000 365470 64.97 bits/value
//
fn float_encode_cpu(c: &mut Criterion) {
benchmark_encode(
c,
"float_encode_cpu",
&SMALLER_BATCH_SIZES,
|batch_size| fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec(),
delorean::encoders::float::encode,
)
}
fn float_decode_cpu(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_cpu",
&SMALLER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<f64> = fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn float_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<f64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn integer_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"integer_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::integer::decode,
)
}
fn timestamp_decode_sequential(c: &mut Criterion) {
benchmark_decode(
c,
"timestamp_decode_sequential",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
let mut encoded = vec![];
delorean::encoders::timestamp::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::timestamp::decode,
)
}
fn float_decode_random(c: &mut Criterion) {
benchmark_decode(
c,
"float_decode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let range = Uniform::from(0.0..100.0);
let decoded: Vec<_> = rand::thread_rng()
.sample_iter(&range)
.take(batch_size)
.collect();
let mut encoded = vec![];
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::float::decode,
)
}
fn integer_decode_random(c: &mut Criterion) {
benchmark_decode(
c,
"integer_decode_random",
&LARGER_BATCH_SIZES,
|batch_size| {
let decoded: Vec<i64> = (1..batch_size)
.map(|_| rand::thread_rng().gen_range(0, 100))
.collect();
let mut encoded = vec![];
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
(decoded.len(), encoded)
},
delorean::encoders::integer::decode,
)
}
criterion_group!(
benches,
float_encode_sequential,
integer_encode_sequential,
timestamp_encode_sequential,
float_encode_random,
integer_encode_random,
float_encode_cpu,
float_decode_cpu,
float_decode_sequential,
integer_decode_sequential,
timestamp_decode_sequential,
float_decode_random,
integer_decode_random,
);
criterion_main!(benches);

45003
benches/fixtures.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
mod float;
mod integer;
pub mod float;
pub mod integer;
mod simple8b;
mod timestamp;
pub mod timestamp;

View File

@ -12,7 +12,7 @@ fn is_sentinel_u64(v: u64) -> bool {
v == SENTINEL
}
/// encode_all encodes a vector of floats into dst.
/// encode encodes a vector of floats into dst.
///
/// The encoding used is equivalent to the encoding of floats in the Gorilla
/// paper. Each subsequent value is compared to the previous and the XOR of the
@ -20,7 +20,7 @@ fn is_sentinel_u64(v: u64) -> bool {
/// representations based on those are stored.
#[allow(dead_code)]
#[allow(clippy::many_single_char_names)]
pub fn encode_all(src: &mut Vec<f64>, dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> {
pub fn encode(src: &[f64], dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer.
if src.is_empty() {
return Ok(());
@ -313,11 +313,11 @@ const BIT_MASK: [u64; 64] = [
0x7fff_ffff_ffff_ffff,
];
/// decode_all decodes a slice of bytes into a vector of floats.
/// decode decodes a slice of bytes into a vector of floats.
#[allow(dead_code)]
#[allow(clippy::many_single_char_names)]
#[allow(clippy::useless_let_if_seq)]
pub fn decode_all(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> {
pub fn decode(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> {
if src.len() < 9 {
return Ok(());
}
@ -494,12 +494,12 @@ mod tests {
use crate::tests::approximately_equal;
#[test]
fn encode_all_no_values() {
let mut src: Vec<f64> = vec![];
fn encode_no_values() {
let src: Vec<f64> = vec![];
let mut dst = vec![];
// check for error
super::encode_all(&mut src, &mut dst).expect("failed to encode src");
super::encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values.
let exp: Vec<u8> = Vec::new();
@ -507,8 +507,8 @@ mod tests {
}
#[test]
fn encode_all_special_values() {
let mut src: Vec<f64> = vec![
fn encode_special_values() {
let src: Vec<f64> = vec![
100.0,
222.12,
f64::from_bits(0x7ff8000000000001), // Go representation of signalling NaN
@ -526,10 +526,10 @@ mod tests {
let mut dst = vec![];
// check for error
super::encode_all(&mut src, &mut dst).expect("failed to encode src");
super::encode(&src, &mut dst).expect("failed to encode src");
let mut got = vec![];
super::decode_all(&dst, &mut got).expect("failed to decode");
super::decode(&dst, &mut got).expect("failed to decode");
// Verify decoded values.
assert_eq!(got.len(), src.len());
@ -544,7 +544,7 @@ mod tests {
}
#[test]
fn encode_all() {
fn encode() {
struct Test {
name: String,
input: Vec<f64>,
@ -1647,14 +1647,14 @@ mod tests {
];
for test in tests {
let mut dst = vec![];
let mut src = test.input.clone();
let exp = test.input;
super::encode_all(&mut src, &mut dst).expect("failed to encode");
let src = test.input;
super::encode(&src, &mut dst).expect("failed to encode");
let mut got = vec![];
super::decode_all(&dst, &mut got).expect("failed to decode");
super::decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp, "{}", test.name);
assert_eq!(got, src, "{}", test.name);
}
}
}

View File

@ -9,21 +9,21 @@ enum Encoding {
Rle = 2,
}
/// encode_all encodes a vector of signed integers into dst.
/// encode encodes a vector of signed integers into dst.
///
/// Deltas between the integers in the vector are first calculated, and these
/// deltas are then zig-zag encoded. The resulting zig-zag encoded deltas are
/// further compressed if possible, either via bit-packing using simple8b or by
/// run-length encoding the deltas if they're all the same.
#[allow(dead_code)]
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer.
if src.is_empty() {
return Ok(());
}
let mut max: u64 = 0;
let mut deltas = i64_to_u64_vector(src);
let mut deltas = i64_to_u64_vector(&src);
for i in (1..deltas.len()).rev() {
deltas[i] = zig_zag_encode(deltas[i].wrapping_sub(deltas[i - 1]) as i64);
if deltas[i] > max {
@ -67,7 +67,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
// first 4 high bits used for encoding type
dst.push((Encoding::Simple8b as u8) << 4);
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
simple8b::encode_all(&deltas[1..], dst)
simple8b::encode(&deltas[1..], dst)
}
// zig_zag_encode converts a signed integer into an unsigned one by zig zagging
@ -114,9 +114,9 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n);
}
/// decode_all decodes a slice of bytes into a vector of signed integers.
/// decode decodes a slice of bytes into a vector of signed integers.
#[allow(dead_code)]
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
if src.is_empty() {
return Ok(());
}
@ -198,7 +198,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
buf.copy_from_slice(&src[0..8]);
dst.push(zig_zag_decode(u64::from_be_bytes(buf)));
simple8b::decode_all(&src[8..], &mut res);
simple8b::decode(&src[8..], &mut res);
// TODO(edd): fix this. It's copying, which is slowwwwwwwww.
let mut next = dst[0];
for v in &res {
@ -227,36 +227,36 @@ mod tests {
}
#[test]
fn encode_all_no_values() {
let mut src: Vec<i64> = vec![];
fn encode_no_values() {
let src: Vec<i64> = vec![];
let mut dst = vec![];
// check for error
encode_all(&mut src, &mut dst).expect("failed to encode src");
encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values.
assert_eq!(dst.to_vec().len(), 0);
}
#[test]
fn encode_all_uncompressed() {
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
fn encode_uncompressed() {
let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
let mut dst = vec![];
let exp = src.clone();
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify uncompressed encoding used
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp);
}
#[test]
fn encode_all_rle() {
fn encode_rle() {
struct Test {
name: String,
input: Vec<i64>,
@ -291,21 +291,21 @@ mod tests {
for test in tests {
let mut dst = vec![];
let mut src = test.input.clone();
let src = test.input.clone();
let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify RLE encoding used
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp, "{}", test.name);
}
}
#[test]
fn encode_all_simple8b() {
fn encode_simple8b() {
struct Test {
name: String,
input: Vec<i64>,
@ -328,14 +328,14 @@ mod tests {
for test in tests {
let mut dst = vec![];
let mut src = test.input.clone();
let src = test.input.clone();
let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify Simple8b encoding used
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp, "{}", test.name);
}

View File

@ -25,9 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
[1, 60],
];
/// encode_all packs and binary encodes the provides slice of u64 values using
/// encode packs and binary encodes the provides slice of u64 values using
/// simple8b into the provided vector.
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
pub fn encode<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
let mut i = 0;
'next_value: while i < src.len() {
// try to pack a run of 240 or 120 1s
@ -79,9 +79,9 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn E
Ok(())
}
/// decode_all decodes and unpacks the binary-encoded values stored in src into
/// decode decodes and unpacks the binary-encoded values stored in src into
/// dst.
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
let mut i = 0;
let mut j = 0;
let mut buf: [u8; 8] = [0; 8];
@ -90,14 +90,13 @@ pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
dst.resize(j + 240, 0); // may need 240 capacity
}
buf.copy_from_slice(&src[i..i + 8]);
j += decode(u64::from_be_bytes(buf), &mut dst[j..]);
j += decode_value(u64::from_be_bytes(buf), &mut dst[j..]);
i += 8;
}
dst.truncate(j);
}
#[allow(dead_code)]
pub fn decode(v: u64, dst: &mut [u64]) -> usize {
fn decode_value(v: u64, dst: &mut [u64]) -> usize {
let sel = v >> S8B_BIT_SIZE as u64;
let mut v = v;
match sel {
@ -225,7 +224,7 @@ mod tests {
let mut dst = vec![];
// check for error
encode_all(&src, &mut dst).expect("failed to encode src");
encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values.
assert_eq!(dst.len(), src.len())
@ -237,9 +236,9 @@ mod tests {
let mut encoded = vec![];
let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode");
encode(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 16); // verify vector is truncated.
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
}
@ -249,9 +248,9 @@ mod tests {
let mut encoded = vec![];
let mut decoded = vec![];
encode_all(&src, &mut encoded).expect("failed to encode");
encode(&src, &mut encoded).expect("failed to encode");
assert_eq!(encoded.len(), 24); // verify vector is truncated.
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
}
@ -260,12 +259,12 @@ mod tests {
let src = vec![7, 6, 2 << (61 - 1), 4, 3, 2, 1];
let mut encoded = vec![];
let result = encode_all(&src, &mut encoded);
let result = encode(&src, &mut encoded);
assert_eq!(result.unwrap_err().to_string(), "value out of bounds");
}
#[test]
fn test_encode_all() {
fn test_encode() {
struct Test {
name: String,
// TODO(edd): no idea how to store the closure in the struct rather than the
@ -338,9 +337,9 @@ mod tests {
for test in tests {
let mut encoded = vec![];
encode_all(&test.input, &mut encoded).expect("failed to encode");
encode(&test.input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
}
@ -350,27 +349,27 @@ mod tests {
let mut input = ones(240)();
input[120] = 5;
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
input = ones(240)();
input[119] = 5;
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
input = ones(241)();
input[239] = 5;
let mut encoded = vec![];
encode_all(&input, &mut encoded).expect("failed to encode");
encode(&input, &mut encoded).expect("failed to encode");
let mut decoded = vec![];
decode_all(&encoded, &mut decoded);
decode(&encoded, &mut decoded);
assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
}

View File

@ -9,7 +9,7 @@ enum Encoding {
Rle = 2,
}
/// encode_all encodes a vector of signed integers into a slice of bytes.
/// encode encodes a vector of signed integers into a slice of bytes.
///
/// To maximise compression, the provided vector should be sorted in ascending
/// order. First deltas between the integers are determined, then further encoding
@ -17,7 +17,7 @@ enum Encoding {
/// encoded using RLE. If not, as long as the deltas are not bigger than simple8b::MAX_VALUE
/// they can be encoded using simple8b.
#[allow(dead_code)]
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
dst.truncate(0); // reset buffer.
if src.is_empty() {
return Ok(());
@ -86,7 +86,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
dst.push((Encoding::Simple8b as u8) << 4);
dst[0] |= ((div as f64).log10()) as u8; // 4 low bits used for log10 divisor
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
simple8b::encode_all(&deltas[1..], dst)
simple8b::encode(&deltas[1..], dst)
}
// i64_to_u64_vector converts a Vec<i64> to Vec<u64>.
@ -142,10 +142,10 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
dst.truncate(n);
}
/// decode_all decodes a slice of bytes encoded using encode_all back into a
/// decode decodes a slice of bytes encoded using encode back into a
/// vector of signed integers.
#[allow(dead_code)]
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
if src.is_empty() {
return Ok(());
}
@ -234,7 +234,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
buf.copy_from_slice(&src[1..9]);
dst.push(i64::from_be_bytes(buf));
simple8b::decode_all(&src[9..], &mut res);
simple8b::decode(&src[9..], &mut res);
let mut next = dst[dst.len() - 1];
if scaler > 1 {
// TODO(edd): fix this. It's copying, which is slowwwwwwwww.
@ -259,37 +259,37 @@ mod tests {
use super::*;
#[test]
fn encode_all_no_values() {
let mut src: Vec<i64> = vec![];
fn encode_no_values() {
let src: Vec<i64> = vec![];
let mut dst = vec![];
// check for error
encode_all(&mut src, &mut dst).expect("failed to encode src");
encode(&src, &mut dst).expect("failed to encode src");
// verify encoded no values.
assert_eq!(dst.len(), 0);
}
#[test]
fn encode_all_uncompressed() {
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
fn encode_uncompressed() {
let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
let mut dst = vec![];
let exp = src.clone();
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify uncompressed encoding used
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp);
}
#[test]
fn encode_all_rle() {
fn encode_rle() {
struct Test {
name: String,
input: Vec<i64>,
@ -332,22 +332,22 @@ mod tests {
for test in tests {
let mut dst = vec![];
let mut src = test.input.clone();
let src = test.input.clone();
let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify RLE encoding used
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp, "{}", test.name);
}
}
#[test]
fn encode_all_simple8b() {
fn encode_simple8b() {
struct Test {
name: String,
input: Vec<i64>,
@ -370,14 +370,14 @@ mod tests {
for test in tests {
let mut dst = vec![];
let mut src = test.input.clone();
let src = test.input.clone();
let exp = test.input;
encode_all(&mut src, &mut dst).expect("failed to encode");
encode(&src, &mut dst).expect("failed to encode");
// verify Simple8b encoding used
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
let mut got = vec![];
decode_all(&dst, &mut got).expect("failed to decode");
decode(&dst, &mut got).expect("failed to decode");
// verify got same values back
assert_eq!(got, exp, "{}", test.name);
}