353 lines
11 KiB
Rust
353 lines
11 KiB
Rust
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
|
use rand::{distributions::Uniform, Rng};
|
|
|
|
use std::convert::TryFrom;
|
|
use std::mem;
|
|
|
|
mod fixtures;
|
|
|
|
const LARGER_BATCH_SIZES: [usize; 12] = [
|
|
10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 50_000, 100_000,
|
|
];
|
|
|
|
const SMALLER_BATCH_SIZES: [usize; 11] =
|
|
[10, 25, 50, 100, 250, 500, 750, 1_000, 5_000, 10_000, 45_000];
|
|
|
|
type EncodeFn<T> = fn(src: &[T], dst: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>>;
|
|
type DecodeFn<T> = fn(src: &[u8], dst: &mut Vec<T>) -> Result<(), Box<dyn std::error::Error>>;
|
|
|
|
fn benchmark_encode_sequential<T: From<i32>>(
|
|
c: &mut Criterion,
|
|
benchmark_group_name: &str,
|
|
batch_sizes: &[usize],
|
|
encode: EncodeFn<T>,
|
|
) {
|
|
benchmark_encode(
|
|
c,
|
|
benchmark_group_name,
|
|
batch_sizes,
|
|
|batch_size| (1..batch_size).map(convert_from_usize).collect(),
|
|
encode,
|
|
);
|
|
}
|
|
|
|
fn benchmark_encode<T>(
|
|
c: &mut Criterion,
|
|
benchmark_group_name: &str,
|
|
batch_sizes: &[usize],
|
|
decoded_value_generation: fn(batch_size: usize) -> Vec<T>,
|
|
encode: EncodeFn<T>,
|
|
) {
|
|
let mut group = c.benchmark_group(benchmark_group_name);
|
|
for &batch_size in batch_sizes {
|
|
group.throughput(Throughput::Bytes(
|
|
u64::try_from(batch_size * mem::size_of::<T>()).unwrap(),
|
|
));
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(batch_size),
|
|
&batch_size,
|
|
|b, &batch_size| {
|
|
let decoded = decoded_value_generation(batch_size);
|
|
let mut encoded = vec![];
|
|
b.iter(|| {
|
|
encode(&decoded, &mut encoded).unwrap();
|
|
});
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
fn benchmark_decode<T>(
|
|
c: &mut Criterion,
|
|
benchmark_group_name: &str,
|
|
batch_sizes: &[usize],
|
|
input_value_generation: fn(batch_size: usize) -> (usize, Vec<u8>),
|
|
decode: DecodeFn<T>,
|
|
) {
|
|
let mut group = c.benchmark_group(benchmark_group_name);
|
|
for &batch_size in batch_sizes {
|
|
let (decoded_len, encoded) = input_value_generation(batch_size);
|
|
group.throughput(Throughput::Bytes(u64::try_from(encoded.len()).unwrap()));
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(batch_size),
|
|
&decoded_len,
|
|
|b, &decoded_len| {
|
|
let mut decoded_mut = Vec::with_capacity(decoded_len);
|
|
b.iter(|| {
|
|
decoded_mut.clear();
|
|
decode(&encoded, &mut decoded_mut).unwrap();
|
|
});
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
fn convert_from_usize<T: From<i32>>(a: usize) -> T {
|
|
i32::try_from(a).unwrap().into()
|
|
}
|
|
|
|
// The current float encoder produces the following compression:
|
|
//
|
|
// values block size compression
|
|
// 10 33 26.4 bits/value
|
|
// 25 52 16.64 bits/value
|
|
// 50 78 12.48 bits/value
|
|
// 100 129 10.32 bits/value
|
|
// 250 290 9.28 bits/value
|
|
// 500 584 9.34 bits/value
|
|
// 750 878 9.36 bits/value
|
|
// 1000 1221 9.76 bits/value
|
|
// 5000 7013 11.22 bits/value
|
|
// 10000 15145 12.11 bits/value
|
|
// 50000 90090 14.41 bits/value
|
|
// 100000 192481 15.39 bits/value
|
|
//
|
|
fn float_encode_sequential(c: &mut Criterion) {
|
|
benchmark_encode_sequential(
|
|
c,
|
|
"float_encode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
influxdb_tsm::encoders::float::encode,
|
|
);
|
|
}
|
|
|
|
// The current integer encoder produces the following compression. Note, since
|
|
// a sequential range of values can be encoded using RLE the compression
|
|
// statistics are not very interesting.
|
|
//
|
|
// values block size compression
|
|
// 10 11 8.80 bits/value
|
|
// 25 11 3.52 bits/value
|
|
// 50 11 1.76 bits/value
|
|
// 100 11 0.88 bits/value
|
|
// 250 12 0.38 bits/value
|
|
// 500 12 0.19 bits/value
|
|
// 750 12 0.12 bits/value
|
|
// 1000 12 0.09 bits/value
|
|
// 5000 12 0.01 bits/value
|
|
// 10000 12 0.00 bits/value
|
|
// 50000 13 0.00 bits/value
|
|
// 100000 13 0.00 bits/value
|
|
//
|
|
fn integer_encode_sequential(c: &mut Criterion) {
|
|
benchmark_encode_sequential(
|
|
c,
|
|
"integer_encode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
influxdb_tsm::encoders::integer::encode,
|
|
);
|
|
}
|
|
|
|
fn timestamp_encode_sequential(c: &mut Criterion) {
|
|
benchmark_encode_sequential(
|
|
c,
|
|
"timestamp_encode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
influxdb_tsm::encoders::timestamp::encode,
|
|
);
|
|
}
|
|
|
|
// The current float encoder produces the following compression:
|
|
//
|
|
// values block size compression
|
|
// 10 32 25.6 bits/value
|
|
// 25 76 24.32 bits/value
|
|
// 50 86 13.76 bits/value
|
|
// 100 167 13.36 bits/value
|
|
// 250 388 12.41 bits/value
|
|
// 500 1165 18.64 bits/value
|
|
// 750 1769 18.86 bits/value
|
|
// 1000 2366 18.92 bits/value
|
|
// 5000 11785 18.85 bits/value
|
|
// 10000 23559 18.84 bits/value
|
|
// 50000 117572 18.81 bits/value
|
|
// 100000 235166 18.81 bits/value
|
|
//
|
|
fn float_encode_random(c: &mut Criterion) {
|
|
benchmark_encode(
|
|
c,
|
|
"float_encode_random",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let range = Uniform::from(0.0..100.0);
|
|
rand::thread_rng()
|
|
.sample_iter(&range)
|
|
.take(batch_size)
|
|
.collect()
|
|
},
|
|
influxdb_tsm::encoders::float::encode,
|
|
)
|
|
}
|
|
|
|
// The current integer encoder produces the following compression:
|
|
//
|
|
// values block size compression
|
|
// 10 25 20.00 bits/value
|
|
// 25 33 10.56 bits/value
|
|
// 50 65 10.40 bits/value
|
|
// 100 121 9.68 bits/value
|
|
// 250 281 8.99 bits/value
|
|
// 500 561 8.97 bits/value
|
|
// 750 833 8.88 bits/value
|
|
// 1000 1105 8.84 bits/value
|
|
// 5000 5425 8.68 bits/value
|
|
// 10000 10865 8.69 bits/value
|
|
// 50000 54361 8.69 bits/value
|
|
// 100000 108569 8.68 bits/value
|
|
//
|
|
fn integer_encode_random(c: &mut Criterion) {
|
|
benchmark_encode(
|
|
c,
|
|
"integer_encode_random",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
(1..batch_size)
|
|
.map(|_| rand::thread_rng().gen_range(0..100))
|
|
.collect()
|
|
},
|
|
influxdb_tsm::encoders::integer::encode,
|
|
)
|
|
}
|
|
|
|
// The current float encoder produces the following compression:
|
|
//
|
|
// values block size compression
|
|
// 10 91 72.8 bits/value
|
|
// 25 208 66.56 bits/value
|
|
// 50 411 65.76 bits/value
|
|
// 100 809 64.72 bits/value
|
|
// 250 2028 64.89 bits/value
|
|
// 500 4059 64.94 bits/value
|
|
// 750 6091 64.97 bits/value
|
|
// 1000 8122 64.97 bits/value
|
|
// 5000 40614 64.98 bits/value
|
|
// 10000 81223 64.97 bits/value
|
|
// 45000 365470 64.97 bits/value
|
|
//
|
|
fn float_encode_cpu(c: &mut Criterion) {
|
|
benchmark_encode(
|
|
c,
|
|
"float_encode_cpu",
|
|
&SMALLER_BATCH_SIZES,
|
|
|batch_size| fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec(),
|
|
influxdb_tsm::encoders::float::encode,
|
|
)
|
|
}
|
|
|
|
fn float_decode_cpu(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"float_decode_cpu",
|
|
&SMALLER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let decoded: Vec<f64> = fixtures::CPU_F64_EXAMPLE_VALUES[..batch_size].to_vec();
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::float::decode,
|
|
)
|
|
}
|
|
|
|
fn float_decode_sequential(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"float_decode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let decoded: Vec<f64> = (1..batch_size).map(convert_from_usize).collect();
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::float::decode,
|
|
)
|
|
}
|
|
|
|
fn integer_decode_sequential(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"integer_decode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::integer::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::integer::decode,
|
|
)
|
|
}
|
|
|
|
fn timestamp_decode_sequential(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"timestamp_decode_sequential",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let decoded: Vec<i64> = (1..batch_size).map(convert_from_usize).collect();
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::timestamp::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::timestamp::decode,
|
|
)
|
|
}
|
|
|
|
fn float_decode_random(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"float_decode_random",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let range = Uniform::from(0.0..100.0);
|
|
let decoded: Vec<_> = rand::thread_rng()
|
|
.sample_iter(&range)
|
|
.take(batch_size)
|
|
.collect();
|
|
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::float::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::float::decode,
|
|
)
|
|
}
|
|
|
|
fn integer_decode_random(c: &mut Criterion) {
|
|
benchmark_decode(
|
|
c,
|
|
"integer_decode_random",
|
|
&LARGER_BATCH_SIZES,
|
|
|batch_size| {
|
|
let decoded: Vec<i64> = (1..batch_size)
|
|
.map(|_| rand::thread_rng().gen_range(0..100))
|
|
.collect();
|
|
let mut encoded = vec![];
|
|
influxdb_tsm::encoders::integer::encode(&decoded, &mut encoded).unwrap();
|
|
(decoded.len(), encoded)
|
|
},
|
|
influxdb_tsm::encoders::integer::decode,
|
|
)
|
|
}
|
|
|
|
criterion_group!(
|
|
benches,
|
|
float_encode_sequential,
|
|
integer_encode_sequential,
|
|
timestamp_encode_sequential,
|
|
float_encode_random,
|
|
integer_encode_random,
|
|
float_encode_cpu,
|
|
float_decode_cpu,
|
|
float_decode_sequential,
|
|
integer_decode_sequential,
|
|
timestamp_decode_sequential,
|
|
float_decode_random,
|
|
integer_decode_random,
|
|
);
|
|
|
|
criterion_main!(benches);
|