Merge pull request #21 from influxdata/er-encoder-bench
test: add encoder/decoder benchmarkspull/24376/head
commit
bfe17259f1
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
|
@ -39,10 +39,17 @@ croaring = "0.4.2"
|
|||
http = "0.2.0"
|
||||
serde_urlencoded = "0.6.1"
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.1.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
reqwest = { version = "0.10.1", features = ["blocking"] }
|
||||
assert_cmd = "0.12.0"
|
||||
rand = "0.7.2"
|
||||
|
||||
[[bench]]
|
||||
name = "encoders"
|
||||
harness = false
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.1.1"
|
||||
rand = "0.7.2"
|
||||
|
|
|
@ -0,0 +1,352 @@
|
|||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use rand::{distributions::Uniform, Rng};
|
||||
|
||||
use std::convert::TryFrom;
|
||||
use std::mem;
|
||||
|
||||
mod fixtures;
|
||||
|
||||
const LARGER_BATCH_SIZES: [usize; 12] = [
|
||||
10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 50_000, 100_000,
|
||||
];
|
||||
|
||||
const SMALLER_BATCH_SIZES: [usize; 11] =
|
||||
[10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 45_000];
|
||||
|
||||
type EncodeFn<T> = fn(src: &[T], dst: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>>;
|
||||
type DecodeFn<T> = fn(src: &[u8], dst: &mut Vec<T>) -> Result<(), Box<dyn std::error::Error>>;
|
||||
|
||||
fn benchmark_encode_sequential<T: From<i32>>(
|
||||
c: &mut Criterion,
|
||||
benchmark_group_name: &str,
|
||||
batch_sizes: &[usize],
|
||||
encode: EncodeFn<T>,
|
||||
) {
|
||||
benchmark_encode(
|
||||
c,
|
||||
benchmark_group_name,
|
||||
batch_sizes,
|
||||
|batch_size| (1..batch_size).map(convert_from_usize).collect(),
|
||||
encode,
|
||||
);
|
||||
}
|
||||
|
||||
fn benchmark_encode<T>(
|
||||
c: &mut Criterion,
|
||||
benchmark_group_name: &str,
|
||||
batch_sizes: &[usize],
|
||||
decoded_value_generation: fn(batch_size: usize) -> Vec<T>,
|
||||
encode: EncodeFn<T>,
|
||||
) {
|
||||
let mut group = c.benchmark_group(benchmark_group_name);
|
||||
for &batch_size in batch_sizes {
|
||||
group.throughput(Throughput::Bytes(
|
||||
u64::try_from(batch_size * mem::size_of::<T>()).unwrap(),
|
||||
));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(batch_size),
|
||||
&batch_size,
|
||||
|b, &batch_size| {
|
||||
let decoded = decoded_value_generation(batch_size);
|
||||
let mut encoded = vec![];
|
||||
b.iter(|| {
|
||||
encode(&decoded, &mut encoded).unwrap();
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn benchmark_decode<T>(
|
||||
c: &mut Criterion,
|
||||
benchmark_group_name: &str,
|
||||
batch_sizes: &[usize],
|
||||
input_value_generation: fn(batch_size: usize) -> (usize, Vec<u8>),
|
||||
decode: DecodeFn<T>,
|
||||
) {
|
||||
let mut group = c.benchmark_group(benchmark_group_name);
|
||||
for &batch_size in batch_sizes {
|
||||
let (decoded_len, encoded) = input_value_generation(batch_size);
|
||||
group.throughput(Throughput::Bytes(u64::try_from(encoded.len()).unwrap()));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(batch_size),
|
||||
&decoded_len,
|
||||
|b, &decoded_len| {
|
||||
let mut decoded_mut = Vec::with_capacity(decoded_len);
|
||||
b.iter(|| {
|
||||
decoded_mut.truncate(0);
|
||||
decode(&encoded, &mut decoded_mut).unwrap();
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn convert_from_usize<T: From<i32>>(a: usize) -> T {
|
||||
i32::try_from(a).unwrap().into()
|
||||
}
|
||||
|
||||
// The current float encoder produces the following compression:
|
||||
//
|
||||
// values block size compression
|
||||
// 10 33 26.4 bits/value
|
||||
// 25 52 16.64 bits/value
|
||||
// 50 78 12.48 bits/value
|
||||
// 100 129 10.32 bits/value
|
||||
// 250 290 9.28 bits/value
|
||||
// 500 584 9.34 bits/value
|
||||
// 750 878 9.36 bits/value
|
||||
// 1000 1221 9.76 bits/value
|
||||
// 5000 7013 11.22 bits/value
|
||||
// 10000 15145 12.11 bits/value
|
||||
// 50000 90090 14.41 bits/value
|
||||
// 100000 192481 15.39 bits/value
|
||||
//
|
||||
fn float_encode_sequential(c: &mut Criterion) {
|
||||
benchmark_encode_sequential(
|
||||
c,
|
||||
"float_encode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
delorean::encoders::float::encode,
|
||||
);
|
||||
}
|
||||
|
||||
// The current integer encoder produces the following compression. Note, since
|
||||
// a sequential range of values can be encoded using RLE the compression
|
||||
// statistics are not very interesting.
|
||||
//
|
||||
// values block size compression
|
||||
// 10 11 8.80 bits/value
|
||||
// 25 11 3.52 bits/value
|
||||
// 50 11 1.76 bits/value
|
||||
// 100 11 0.88 bits/value
|
||||
// 250 12 0.38 bits/value
|
||||
// 500 12 0.19 bits/value
|
||||
// 750 12 0.12 bits/value
|
||||
// 1000 12 0.09 bits/value
|
||||
// 5000 12 0.01 bits/value
|
||||
// 10000 12 0.00 bits/value
|
||||
// 50000 13 0.00 bits/value
|
||||
// 100000 13 0.00 bits/value
|
||||
//
|
||||
fn integer_encode_sequential(c: &mut Criterion) {
|
||||
benchmark_encode_sequential(
|
||||
c,
|
||||
"integer_encode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
delorean::encoders::integer::encode,
|
||||
);
|
||||
}
|
||||
|
||||
fn timestamp_encode_sequential(c: &mut Criterion) {
|
||||
benchmark_encode_sequential(
|
||||
c,
|
||||
"timestamp_encode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
delorean::encoders::timestamp::encode,
|
||||
);
|
||||
}
|
||||
|
||||
// The current float encoder produces the following compression:
|
||||
//
|
||||
// values block size compression
|
||||
// 10 32 25.6 bits/value
|
||||
// 25 76 24.32 bits/value
|
||||
// 50 86 13.76 bits/value
|
||||
// 100 167 13.36 bits/value
|
||||
// 250 388 12.41 bits/value
|
||||
// 500 1165 18.64 bits/value
|
||||
// 750 1769 18.86 bits/value
|
||||
// 1000 2366 18.92 bits/value
|
||||
// 5000 11785 18.85 bits/value
|
||||
// 10000 23559 18.84 bits/value
|
||||
// 50000 117572 18.81 bits/value
|
||||
// 100000 235166 18.81 bits/value
|
||||
//
|
||||
fn float_encode_random(c: &mut Criterion) {
|
||||
benchmark_encode(
|
||||
c,
|
||||
"float_encode_random",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let range = Uniform::from(0.0..100.0);
|
||||
rand::thread_rng()
|
||||
.sample_iter(&range)
|
||||
.take(batch_size)
|
||||
.collect()
|
||||
},
|
||||
delorean::encoders::float::encode,
|
||||
)
|
||||
}
|
||||
|
||||
// The current integer encoder produces the following compression:
|
||||
//
|
||||
// values block size compression
|
||||
// 10 25 20.00 bits/value
|
||||
// 25 33 10.56 bits/value
|
||||
// 50 65 10.40 bits/value
|
||||
// 100 121 9.68 bits/value
|
||||
// 250 281 8.99 bits/value
|
||||
// 500 561 8.97 bits/value
|
||||
// 750 833 8.88 bits/value
|
||||
// 1000 1105 8.84 bits/value
|
||||
// 5000 5425 8.68 bits/value
|
||||
// 10000 10865 8.69 bits/value
|
||||
// 50000 54361 8.69 bits/value
|
||||
// 100000 108569 8.68 bits/value
|
||||
//
|
||||
fn integer_encode_random(c: &mut Criterion) {
|
||||
benchmark_encode(
|
||||
c,
|
||||
"integer_encode_random",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
(1..batch_size)
|
||||
.map(|_| rand::thread_rng().gen_range(0, 100))
|
||||
.collect()
|
||||
},
|
||||
delorean::encoders::integer::encode,
|
||||
)
|
||||
}
|
||||
|
||||
// The current float encoder produces the following compression:
|
||||
//
|
||||
// values block size compression
|
||||
// 10 91 72.8 bits/value
|
||||
// 25 208 66.56 bits/value
|
||||
// 50 411 65.76 bits/value
|
||||
// 100 809 64.72 bits/value
|
||||
// 250 2028 64.89 bits/value
|
||||
// 500 4059 64.94 bits/value
|
||||
// 750 6091 64.97 bits/value
|
||||
// 1000 8122 64.97 bits/value
|
||||
// 5000 40614 64.98 bits/value
|
||||
// 10000 81223 64.97 bits/value
|
||||
// 45000 365470 64.97 bits/value
|
||||
//
|
||||
fn float_encode_cpu(c: &mut Criterion) {
|
||||
benchmark_encode(
|
||||
c,
|
||||
"float_encode_cpu",
|
||||
&SMALLER_BATCH_SIZES,
|
||||
|batch_size| fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec(),
|
||||
delorean::encoders::float::encode,
|
||||
)
|
||||
}
|
||||
|
||||
fn float_decode_cpu(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"float_decode_cpu",
|
||||
&SMALLER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let decoded: Vec<f64> = fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec();
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::float::decode,
|
||||
)
|
||||
}
|
||||
|
||||
fn float_decode_sequential(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"float_decode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let decoded: Vec<f64> = (1..batch_size).map(convert_from_usize).collect();
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::float::decode,
|
||||
)
|
||||
}
|
||||
|
||||
fn integer_decode_sequential(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"integer_decode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::integer::decode,
|
||||
)
|
||||
}
|
||||
|
||||
fn timestamp_decode_sequential(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"timestamp_decode_sequential",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::timestamp::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::timestamp::decode,
|
||||
)
|
||||
}
|
||||
|
||||
fn float_decode_random(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"float_decode_random",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let range = Uniform::from(0.0..100.0);
|
||||
let decoded: Vec<_> = rand::thread_rng()
|
||||
.sample_iter(&range)
|
||||
.take(batch_size)
|
||||
.collect();
|
||||
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::float::decode,
|
||||
)
|
||||
}
|
||||
|
||||
fn integer_decode_random(c: &mut Criterion) {
|
||||
benchmark_decode(
|
||||
c,
|
||||
"integer_decode_random",
|
||||
&LARGER_BATCH_SIZES,
|
||||
|batch_size| {
|
||||
let decoded: Vec<i64> = (1..batch_size)
|
||||
.map(|_| rand::thread_rng().gen_range(0, 100))
|
||||
.collect();
|
||||
let mut encoded = vec![];
|
||||
delorean::encoders::integer::encode(&decoded, &mut encoded).unwrap();
|
||||
(decoded.len(), encoded)
|
||||
},
|
||||
delorean::encoders::integer::decode,
|
||||
)
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
float_encode_sequential,
|
||||
integer_encode_sequential,
|
||||
timestamp_encode_sequential,
|
||||
float_encode_random,
|
||||
integer_encode_random,
|
||||
float_encode_cpu,
|
||||
float_decode_cpu,
|
||||
float_decode_sequential,
|
||||
integer_decode_sequential,
|
||||
timestamp_decode_sequential,
|
||||
float_decode_random,
|
||||
integer_decode_random,
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
mod float;
|
||||
mod integer;
|
||||
pub mod float;
|
||||
pub mod integer;
|
||||
mod simple8b;
|
||||
mod timestamp;
|
||||
pub mod timestamp;
|
||||
|
|
|
@ -12,7 +12,7 @@ fn is_sentinel_u64(v: u64) -> bool {
|
|||
v == SENTINEL
|
||||
}
|
||||
|
||||
/// encode_all encodes a vector of floats into dst.
|
||||
/// encode encodes a vector of floats into dst.
|
||||
///
|
||||
/// The encoding used is equivalent to the encoding of floats in the Gorilla
|
||||
/// paper. Each subsequent value is compared to the previous and the XOR of the
|
||||
|
@ -20,7 +20,7 @@ fn is_sentinel_u64(v: u64) -> bool {
|
|||
/// representations based on those are stored.
|
||||
#[allow(dead_code)]
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub fn encode_all(src: &mut Vec<f64>, dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn encode(src: &[f64], dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
dst.truncate(0); // reset buffer.
|
||||
if src.is_empty() {
|
||||
return Ok(());
|
||||
|
@ -313,11 +313,11 @@ const BIT_MASK: [u64; 64] = [
|
|||
0x7fff_ffff_ffff_ffff,
|
||||
];
|
||||
|
||||
/// decode_all decodes a slice of bytes into a vector of floats.
|
||||
/// decode decodes a slice of bytes into a vector of floats.
|
||||
#[allow(dead_code)]
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
#[allow(clippy::useless_let_if_seq)]
|
||||
pub fn decode_all(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn decode(src: &[u8], dst: &mut Vec<f64>) -> Result<(), Box<dyn Error>> {
|
||||
if src.len() < 9 {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -494,12 +494,12 @@ mod tests {
|
|||
use crate::tests::approximately_equal;
|
||||
|
||||
#[test]
|
||||
fn encode_all_no_values() {
|
||||
let mut src: Vec<f64> = vec![];
|
||||
fn encode_no_values() {
|
||||
let src: Vec<f64> = vec![];
|
||||
let mut dst = vec![];
|
||||
|
||||
// check for error
|
||||
super::encode_all(&mut src, &mut dst).expect("failed to encode src");
|
||||
super::encode(&src, &mut dst).expect("failed to encode src");
|
||||
|
||||
// verify encoded no values.
|
||||
let exp: Vec<u8> = Vec::new();
|
||||
|
@ -507,8 +507,8 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_special_values() {
|
||||
let mut src: Vec<f64> = vec![
|
||||
fn encode_special_values() {
|
||||
let src: Vec<f64> = vec![
|
||||
100.0,
|
||||
222.12,
|
||||
f64::from_bits(0x7ff8000000000001), // Go representation of signalling NaN
|
||||
|
@ -526,10 +526,10 @@ mod tests {
|
|||
let mut dst = vec![];
|
||||
|
||||
// check for error
|
||||
super::encode_all(&mut src, &mut dst).expect("failed to encode src");
|
||||
super::encode(&src, &mut dst).expect("failed to encode src");
|
||||
|
||||
let mut got = vec![];
|
||||
super::decode_all(&dst, &mut got).expect("failed to decode");
|
||||
super::decode(&dst, &mut got).expect("failed to decode");
|
||||
|
||||
// Verify decoded values.
|
||||
assert_eq!(got.len(), src.len());
|
||||
|
@ -544,7 +544,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all() {
|
||||
fn encode() {
|
||||
struct Test {
|
||||
name: String,
|
||||
input: Vec<f64>,
|
||||
|
@ -1647,14 +1647,14 @@ mod tests {
|
|||
];
|
||||
for test in tests {
|
||||
let mut dst = vec![];
|
||||
let mut src = test.input.clone();
|
||||
let exp = test.input;
|
||||
super::encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
let src = test.input;
|
||||
|
||||
super::encode(&src, &mut dst).expect("failed to encode");
|
||||
|
||||
let mut got = vec![];
|
||||
super::decode_all(&dst, &mut got).expect("failed to decode");
|
||||
super::decode(&dst, &mut got).expect("failed to decode");
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp, "{}", test.name);
|
||||
assert_eq!(got, src, "{}", test.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,21 +9,21 @@ enum Encoding {
|
|||
Rle = 2,
|
||||
}
|
||||
|
||||
/// encode_all encodes a vector of signed integers into dst.
|
||||
/// encode encodes a vector of signed integers into dst.
|
||||
///
|
||||
/// Deltas between the integers in the vector are first calculated, and these
|
||||
/// deltas are then zig-zag encoded. The resulting zig-zag encoded deltas are
|
||||
/// further compressed if possible, either via bit-packing using simple8b or by
|
||||
/// run-length encoding the deltas if they're all the same.
|
||||
#[allow(dead_code)]
|
||||
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
dst.truncate(0); // reset buffer.
|
||||
if src.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut max: u64 = 0;
|
||||
let mut deltas = i64_to_u64_vector(src);
|
||||
let mut deltas = i64_to_u64_vector(&src);
|
||||
for i in (1..deltas.len()).rev() {
|
||||
deltas[i] = zig_zag_encode(deltas[i].wrapping_sub(deltas[i - 1]) as i64);
|
||||
if deltas[i] > max {
|
||||
|
@ -67,7 +67,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
|
|||
// first 4 high bits used for encoding type
|
||||
dst.push((Encoding::Simple8b as u8) << 4);
|
||||
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
|
||||
simple8b::encode_all(&deltas[1..], dst)
|
||||
simple8b::encode(&deltas[1..], dst)
|
||||
}
|
||||
|
||||
// zig_zag_encode converts a signed integer into an unsigned one by zig zagging
|
||||
|
@ -114,9 +114,9 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
|
|||
dst.truncate(n);
|
||||
}
|
||||
|
||||
/// decode_all decodes a slice of bytes into a vector of signed integers.
|
||||
/// decode decodes a slice of bytes into a vector of signed integers.
|
||||
#[allow(dead_code)]
|
||||
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
|
||||
if src.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
|
|||
buf.copy_from_slice(&src[0..8]);
|
||||
dst.push(zig_zag_decode(u64::from_be_bytes(buf)));
|
||||
|
||||
simple8b::decode_all(&src[8..], &mut res);
|
||||
simple8b::decode(&src[8..], &mut res);
|
||||
// TODO(edd): fix this. It's copying, which is slowwwwwwwww.
|
||||
let mut next = dst[0];
|
||||
for v in &res {
|
||||
|
@ -227,36 +227,36 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_no_values() {
|
||||
let mut src: Vec<i64> = vec![];
|
||||
fn encode_no_values() {
|
||||
let src: Vec<i64> = vec![];
|
||||
let mut dst = vec![];
|
||||
|
||||
// check for error
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode src");
|
||||
encode(&src, &mut dst).expect("failed to encode src");
|
||||
|
||||
// verify encoded no values.
|
||||
assert_eq!(dst.to_vec().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_uncompressed() {
|
||||
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
|
||||
fn encode_uncompressed() {
|
||||
let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
|
||||
let mut dst = vec![];
|
||||
|
||||
let exp = src.clone();
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
|
||||
// verify uncompressed encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_rle() {
|
||||
fn encode_rle() {
|
||||
struct Test {
|
||||
name: String,
|
||||
input: Vec<i64>,
|
||||
|
@ -291,21 +291,21 @@ mod tests {
|
|||
|
||||
for test in tests {
|
||||
let mut dst = vec![];
|
||||
let mut src = test.input.clone();
|
||||
let src = test.input.clone();
|
||||
let exp = test.input;
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
|
||||
// verify RLE encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp, "{}", test.name);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_simple8b() {
|
||||
fn encode_simple8b() {
|
||||
struct Test {
|
||||
name: String,
|
||||
input: Vec<i64>,
|
||||
|
@ -328,14 +328,14 @@ mod tests {
|
|||
|
||||
for test in tests {
|
||||
let mut dst = vec![];
|
||||
let mut src = test.input.clone();
|
||||
let src = test.input.clone();
|
||||
let exp = test.input;
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
// verify Simple8b encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
|
||||
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp, "{}", test.name);
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ const NUM_BITS: [[u8; 2]; 14] = [
|
|||
[1, 60],
|
||||
];
|
||||
|
||||
/// encode_all packs and binary encodes the provides slice of u64 values using
|
||||
/// encode packs and binary encodes the provides slice of u64 values using
|
||||
/// simple8b into the provided vector.
|
||||
pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn encode<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
let mut i = 0;
|
||||
'next_value: while i < src.len() {
|
||||
// try to pack a run of 240 or 120 1s
|
||||
|
@ -79,9 +79,9 @@ pub fn encode_all<'a>(src: &[u64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn E
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// decode_all decodes and unpacks the binary-encoded values stored in src into
|
||||
/// decode decodes and unpacks the binary-encoded values stored in src into
|
||||
/// dst.
|
||||
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
|
||||
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
|
||||
let mut i = 0;
|
||||
let mut j = 0;
|
||||
let mut buf: [u8; 8] = [0; 8];
|
||||
|
@ -90,14 +90,13 @@ pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<u64>) {
|
|||
dst.resize(j + 240, 0); // may need 240 capacity
|
||||
}
|
||||
buf.copy_from_slice(&src[i..i + 8]);
|
||||
j += decode(u64::from_be_bytes(buf), &mut dst[j..]);
|
||||
j += decode_value(u64::from_be_bytes(buf), &mut dst[j..]);
|
||||
i += 8;
|
||||
}
|
||||
dst.truncate(j);
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn decode(v: u64, dst: &mut [u64]) -> usize {
|
||||
fn decode_value(v: u64, dst: &mut [u64]) -> usize {
|
||||
let sel = v >> S8B_BIT_SIZE as u64;
|
||||
let mut v = v;
|
||||
match sel {
|
||||
|
@ -225,7 +224,7 @@ mod tests {
|
|||
let mut dst = vec![];
|
||||
|
||||
// check for error
|
||||
encode_all(&src, &mut dst).expect("failed to encode src");
|
||||
encode(&src, &mut dst).expect("failed to encode src");
|
||||
|
||||
// verify encoded no values.
|
||||
assert_eq!(dst.len(), src.len())
|
||||
|
@ -237,9 +236,9 @@ mod tests {
|
|||
|
||||
let mut encoded = vec![];
|
||||
let mut decoded = vec![];
|
||||
encode_all(&src, &mut encoded).expect("failed to encode");
|
||||
encode(&src, &mut encoded).expect("failed to encode");
|
||||
assert_eq!(encoded.len(), 16); // verify vector is truncated.
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
|
||||
}
|
||||
|
||||
|
@ -249,9 +248,9 @@ mod tests {
|
|||
|
||||
let mut encoded = vec![];
|
||||
let mut decoded = vec![];
|
||||
encode_all(&src, &mut encoded).expect("failed to encode");
|
||||
encode(&src, &mut encoded).expect("failed to encode");
|
||||
assert_eq!(encoded.len(), 24); // verify vector is truncated.
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), src, "{}", "mixed sizes");
|
||||
}
|
||||
|
||||
|
@ -260,12 +259,12 @@ mod tests {
|
|||
let src = vec![7, 6, 2 << (61 - 1), 4, 3, 2, 1];
|
||||
|
||||
let mut encoded = vec![];
|
||||
let result = encode_all(&src, &mut encoded);
|
||||
let result = encode(&src, &mut encoded);
|
||||
assert_eq!(result.unwrap_err().to_string(), "value out of bounds");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_all() {
|
||||
fn test_encode() {
|
||||
struct Test {
|
||||
name: String,
|
||||
// TODO(edd): no idea how to store the closure in the struct rather than the
|
||||
|
@ -338,9 +337,9 @@ mod tests {
|
|||
|
||||
for test in tests {
|
||||
let mut encoded = vec![];
|
||||
encode_all(&test.input, &mut encoded).expect("failed to encode");
|
||||
encode(&test.input, &mut encoded).expect("failed to encode");
|
||||
let mut decoded = vec![];
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), test.input, "{}", test.name);
|
||||
}
|
||||
|
||||
|
@ -350,27 +349,27 @@ mod tests {
|
|||
let mut input = ones(240)();
|
||||
input[120] = 5;
|
||||
let mut encoded = vec![];
|
||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||
encode(&input, &mut encoded).expect("failed to encode");
|
||||
let mut decoded = vec![];
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), input, "{}", "120 ones");
|
||||
|
||||
input = ones(240)();
|
||||
input[119] = 5;
|
||||
|
||||
let mut encoded = vec![];
|
||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||
encode(&input, &mut encoded).expect("failed to encode");
|
||||
let mut decoded = vec![];
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), input, "{}", "119 ones");
|
||||
|
||||
input = ones(241)();
|
||||
input[239] = 5;
|
||||
|
||||
let mut encoded = vec![];
|
||||
encode_all(&input, &mut encoded).expect("failed to encode");
|
||||
encode(&input, &mut encoded).expect("failed to encode");
|
||||
let mut decoded = vec![];
|
||||
decode_all(&encoded, &mut decoded);
|
||||
decode(&encoded, &mut decoded);
|
||||
assert_eq!(decoded.to_vec(), input, "{}", "239 ones");
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ enum Encoding {
|
|||
Rle = 2,
|
||||
}
|
||||
|
||||
/// encode_all encodes a vector of signed integers into a slice of bytes.
|
||||
/// encode encodes a vector of signed integers into a slice of bytes.
|
||||
///
|
||||
/// To maximise compression, the provided vector should be sorted in ascending
|
||||
/// order. First deltas between the integers are determined, then further encoding
|
||||
|
@ -17,7 +17,7 @@ enum Encoding {
|
|||
/// encoded using RLE. If not, as long as the deltas are not bigger than simple8b::MAX_VALUE
|
||||
/// they can be encoded using simple8b.
|
||||
#[allow(dead_code)]
|
||||
pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn encode<'a>(src: &[i64], dst: &'a mut Vec<u8>) -> Result<(), Box<dyn Error>> {
|
||||
dst.truncate(0); // reset buffer.
|
||||
if src.is_empty() {
|
||||
return Ok(());
|
||||
|
@ -86,7 +86,7 @@ pub fn encode_all<'a>(src: &mut Vec<i64>, dst: &'a mut Vec<u8>) -> Result<(), Bo
|
|||
dst.push((Encoding::Simple8b as u8) << 4);
|
||||
dst[0] |= ((div as f64).log10()) as u8; // 4 low bits used for log10 divisor
|
||||
dst.extend_from_slice(&deltas[0].to_be_bytes()); // encode first value
|
||||
simple8b::encode_all(&deltas[1..], dst)
|
||||
simple8b::encode(&deltas[1..], dst)
|
||||
}
|
||||
|
||||
// i64_to_u64_vector converts a Vec<i64> to Vec<u64>.
|
||||
|
@ -142,10 +142,10 @@ fn encode_rle(v: u64, delta: u64, count: u64, dst: &mut Vec<u8>) {
|
|||
dst.truncate(n);
|
||||
}
|
||||
|
||||
/// decode_all decodes a slice of bytes encoded using encode_all back into a
|
||||
/// decode decodes a slice of bytes encoded using encode back into a
|
||||
/// vector of signed integers.
|
||||
#[allow(dead_code)]
|
||||
pub fn decode_all<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
|
||||
pub fn decode<'a>(src: &[u8], dst: &'a mut Vec<i64>) -> Result<(), Box<dyn Error>> {
|
||||
if src.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -234,7 +234,7 @@ fn decode_simple8b(src: &[u8], dst: &mut Vec<i64>) -> Result<(), Box<dyn Error>>
|
|||
buf.copy_from_slice(&src[1..9]);
|
||||
dst.push(i64::from_be_bytes(buf));
|
||||
|
||||
simple8b::decode_all(&src[9..], &mut res);
|
||||
simple8b::decode(&src[9..], &mut res);
|
||||
let mut next = dst[dst.len() - 1];
|
||||
if scaler > 1 {
|
||||
// TODO(edd): fix this. It's copying, which is slowwwwwwwww.
|
||||
|
@ -259,37 +259,37 @@ mod tests {
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_all_no_values() {
|
||||
let mut src: Vec<i64> = vec![];
|
||||
fn encode_no_values() {
|
||||
let src: Vec<i64> = vec![];
|
||||
let mut dst = vec![];
|
||||
|
||||
// check for error
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode src");
|
||||
encode(&src, &mut dst).expect("failed to encode src");
|
||||
|
||||
// verify encoded no values.
|
||||
assert_eq!(dst.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_uncompressed() {
|
||||
let mut src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
|
||||
fn encode_uncompressed() {
|
||||
let src: Vec<i64> = vec![-1000, 0, simple8b::MAX_VALUE as i64, 213123421];
|
||||
let mut dst = vec![];
|
||||
|
||||
let exp = src.clone();
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
|
||||
// verify uncompressed encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Uncompressed as u8);
|
||||
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_rle() {
|
||||
fn encode_rle() {
|
||||
struct Test {
|
||||
name: String,
|
||||
input: Vec<i64>,
|
||||
|
@ -332,22 +332,22 @@ mod tests {
|
|||
|
||||
for test in tests {
|
||||
let mut dst = vec![];
|
||||
let mut src = test.input.clone();
|
||||
let src = test.input.clone();
|
||||
let exp = test.input;
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
|
||||
// verify RLE encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Rle as u8);
|
||||
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp, "{}", test.name);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_all_simple8b() {
|
||||
fn encode_simple8b() {
|
||||
struct Test {
|
||||
name: String,
|
||||
input: Vec<i64>,
|
||||
|
@ -370,14 +370,14 @@ mod tests {
|
|||
|
||||
for test in tests {
|
||||
let mut dst = vec![];
|
||||
let mut src = test.input.clone();
|
||||
let src = test.input.clone();
|
||||
let exp = test.input;
|
||||
encode_all(&mut src, &mut dst).expect("failed to encode");
|
||||
encode(&src, &mut dst).expect("failed to encode");
|
||||
// verify Simple8b encoding used
|
||||
assert_eq!(&dst[0] >> 4, Encoding::Simple8b as u8);
|
||||
|
||||
let mut got = vec![];
|
||||
decode_all(&dst, &mut got).expect("failed to decode");
|
||||
decode(&dst, &mut got).expect("failed to decode");
|
||||
// verify got same values back
|
||||
assert_eq!(got, exp, "{}", test.name);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue