influxdb/cache_system/benches/addressable_heap.rs

382 lines
10 KiB
Rust
Raw Normal View History

use std::mem::size_of;
use cache_system::addressable_heap::AddressableHeap;
use criterion::{
criterion_group, criterion_main, measurement::WallTime, AxisScale, BatchSize, BenchmarkGroup,
BenchmarkId, Criterion, PlotConfiguration, SamplingMode,
};
use rand::{prelude::SliceRandom, thread_rng, Rng};
/// Payload (`V`) for testing.
///
/// This is a 64bit-wide object which is enough to store a [`Box`] or a [`usize`].
#[derive(Debug, Clone, Default)]
struct Payload([u8; 8]);
const _: () = assert!(size_of::<Payload>() == 8);
const _: () = assert!(size_of::<Payload>() >= size_of::<Box<Vec<u32>>>());
const _: () = assert!(size_of::<Payload>() >= size_of::<usize>());
type TestHeap = AddressableHeap<u64, Payload, u64>;
const TEST_SIZES: &[usize] = &[0, 1, 10, 100, 1_000, 10_000];
#[derive(Debug, Clone)]
struct Entry {
k: u64,
o: u64,
}
impl Entry {
fn new_random<R>(rng: &mut R) -> Self
where
R: Rng,
{
Self {
k: rng.gen(),
o: rng.gen(),
}
}
fn new_random_n<R>(rng: &mut R, n: usize) -> Vec<Self>
where
R: Rng,
{
(0..n).map(|_| Self::new_random(rng)).collect()
}
}
fn create_filled_heap<R>(rng: &mut R, n: usize) -> (TestHeap, Vec<Entry>)
where
R: Rng,
{
let mut heap = TestHeap::default();
let mut entries = Vec::with_capacity(n);
for _ in 0..n {
let entry = Entry::new_random(rng);
heap.insert(entry.k, Payload::default(), entry.o);
entries.push(entry);
}
(heap, entries)
}
fn setup_group(g: &mut BenchmarkGroup<'_, WallTime>) {
g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
g.sampling_mode(SamplingMode::Flat);
}
fn bench_insert_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("insert_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| (TestHeap::default(), Entry::new_random_n(&mut rng, *n)),
|(mut heap, entries)| {
for entry in &entries {
heap.insert(entry.k, Payload::default(), entry.o);
}
// let criterion handle the drop
(heap, entries)
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_peek_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("peek_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| create_filled_heap(&mut rng, *n).0,
|heap| {
heap.peek();
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_get_existing_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("get_existing_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
if *n == 0 {
continue;
}
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, entries) = create_filled_heap(&mut rng, *n);
let entry = entries.choose(&mut rng).unwrap().clone();
(heap, entry)
},
|(heap, entry)| {
heap.get(&entry.k);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_get_new_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("get_new_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, _entries) = create_filled_heap(&mut rng, *n);
let entry = Entry::new_random(&mut rng);
(heap, entry)
},
|(heap, entry)| {
heap.get(&entry.k);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_pop_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("pop_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| create_filled_heap(&mut rng, *n).0,
|mut heap| {
for _ in 0..*n {
heap.pop();
}
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_remove_existing_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("remove_existing_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
if *n == 0 {
continue;
}
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, entries) = create_filled_heap(&mut rng, *n);
let entry = entries.choose(&mut rng).unwrap().clone();
(heap, entry)
},
|(mut heap, entry)| {
heap.remove(&entry.k);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_remove_new_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("remove_new_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, _entries) = create_filled_heap(&mut rng, *n);
let entry = Entry::new_random(&mut rng);
(heap, entry)
},
|(mut heap, entry)| {
heap.remove(&entry.k);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_replace_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("replace_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
if *n == 0 {
continue;
}
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, entries) = create_filled_heap(&mut rng, *n);
let entry = entries.choose(&mut rng).unwrap().clone();
let entry = Entry {
k: entry.k,
o: Entry::new_random(&mut rng).o,
};
(heap, entry)
},
|(mut heap, entry)| {
heap.insert(entry.k, Payload::default(), entry.o);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
refactor: avoid double-hash when updating addressable heap orders (#5577) * feat: naive `AddresableHeap::update_order` * refactor: use `update_order` within LRU policy * test: add benchmark for `AddressableHeap::update_order` * refactor: avoid double-hash when updating addressable heap orders ```text update_order_existing_after_n_elements/1 time: [25.483 ns 25.513 ns 25.547 ns] change: [-42.490% -42.365% -42.247%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high severe update_order_existing_after_n_elements/10 time: [68.158 ns 68.211 ns 68.266 ns] change: [-19.391% -19.131% -18.952%] (p = 0.00 < 0.05) Performance has improved. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild update_order_existing_after_n_elements/100 time: [128.10 ns 128.43 ns 128.83 ns] change: [-17.732% -17.531% -17.255%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe update_order_existing_after_n_elements/1000 time: [223.08 ns 224.06 ns 225.30 ns] change: [-9.0635% -8.5828% -7.9794%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 2 (2.00%) high mild 5 (5.00%) high severe update_order_existing_after_n_elements/10000 time: [1.0032 µs 1.0216 µs 1.0402 µs] change: [-6.0920% -3.7038% -1.0826%] (p = 0.01 < 0.05) Performance has improved. update_order_new_after_n_elements/0 time: [35.898 ns 35.919 ns 35.943 ns] change: [+183.39% +183.77% +184.12%] (p = 0.00 < 0.05) Performance has regressed. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe update_order_new_after_n_elements/1 time: [13.273 ns 13.299 ns 13.344 ns] change: [-6.6980% -5.9798% -5.2633%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 2 (2.00%) high mild 4 (4.00%) high severe update_order_new_after_n_elements/10 time: [14.010 ns 14.084 ns 14.183 ns] change: [-13.579% -13.117% -12.553%] (p = 0.00 < 0.05) Performance has improved. Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) high mild 9 (9.00%) high severe update_order_new_after_n_elements/100 time: [23.846 ns 23.883 ns 23.921 ns] change: [-4.7412% -4.3738% -4.0715%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high mild update_order_new_after_n_elements/1000 time: [28.590 ns 28.646 ns 28.705 ns] change: [-4.1597% -3.6132% -3.0701%] (p = 0.00 < 0.05) Performance has improved. Found 2 outliers among 100 measurements (2.00%) 2 (2.00%) high mild update_order_new_after_n_elements/10000 time: [31.459 ns 31.975 ns 32.601 ns] change: [-32.153% -20.689% -11.961%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe ```` Improvements might be even bigger for more expensive hash functions (e.g. for `K = Arc<str>`). Note that there is one outlier: `update_order_new_after_n_elements/0`. I suspect this is due to slightly different compiler decisions (there is no technical difference for "update a key of an empty heap"). Since this case is also pretty uncommon in practice (only ~once when the process boots up), I deem this acceptable. Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
2022-09-08 11:50:55 +00:00
fn bench_update_order_existing_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("update_order_existing_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
if *n == 0 {
continue;
}
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, entries) = create_filled_heap(&mut rng, *n);
let entry = entries.choose(&mut rng).unwrap().clone();
let entry = Entry {
k: entry.k,
o: Entry::new_random(&mut rng).o,
};
(heap, entry)
},
|(mut heap, entry)| {
heap.update_order(&entry.k, entry.o);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
fn bench_update_order_new_after_n_elements(c: &mut Criterion) {
let mut g = c.benchmark_group("update_order_new_after_n_elements");
setup_group(&mut g);
let mut rng = thread_rng();
for n in TEST_SIZES {
g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
b.iter_batched(
|| {
let (heap, _entries) = create_filled_heap(&mut rng, *n);
let entry = Entry::new_random(&mut rng);
(heap, entry)
},
|(mut heap, entry)| {
heap.update_order(&entry.k, entry.o);
// let criterion handle the drop
heap
},
BatchSize::LargeInput,
);
});
}
g.finish();
}
criterion_group! {
name = benches;
config = Criterion::default();
targets =
bench_insert_n_elements,
bench_peek_after_n_elements,
bench_get_existing_after_n_elements,
bench_get_new_after_n_elements,
bench_pop_n_elements,
bench_remove_existing_after_n_elements,
bench_remove_new_after_n_elements,
bench_replace_after_n_elements,
refactor: avoid double-hash when updating addressable heap orders (#5577) * feat: naive `AddresableHeap::update_order` * refactor: use `update_order` within LRU policy * test: add benchmark for `AddressableHeap::update_order` * refactor: avoid double-hash when updating addressable heap orders ```text update_order_existing_after_n_elements/1 time: [25.483 ns 25.513 ns 25.547 ns] change: [-42.490% -42.365% -42.247%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high severe update_order_existing_after_n_elements/10 time: [68.158 ns 68.211 ns 68.266 ns] change: [-19.391% -19.131% -18.952%] (p = 0.00 < 0.05) Performance has improved. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild update_order_existing_after_n_elements/100 time: [128.10 ns 128.43 ns 128.83 ns] change: [-17.732% -17.531% -17.255%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 3 (3.00%) high mild 4 (4.00%) high severe update_order_existing_after_n_elements/1000 time: [223.08 ns 224.06 ns 225.30 ns] change: [-9.0635% -8.5828% -7.9794%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 2 (2.00%) high mild 5 (5.00%) high severe update_order_existing_after_n_elements/10000 time: [1.0032 µs 1.0216 µs 1.0402 µs] change: [-6.0920% -3.7038% -1.0826%] (p = 0.01 < 0.05) Performance has improved. update_order_new_after_n_elements/0 time: [35.898 ns 35.919 ns 35.943 ns] change: [+183.39% +183.77% +184.12%] (p = 0.00 < 0.05) Performance has regressed. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe update_order_new_after_n_elements/1 time: [13.273 ns 13.299 ns 13.344 ns] change: [-6.6980% -5.9798% -5.2633%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 2 (2.00%) high mild 4 (4.00%) high severe update_order_new_after_n_elements/10 time: [14.010 ns 14.084 ns 14.183 ns] change: [-13.579% -13.117% -12.553%] (p = 0.00 < 0.05) Performance has improved. Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) high mild 9 (9.00%) high severe update_order_new_after_n_elements/100 time: [23.846 ns 23.883 ns 23.921 ns] change: [-4.7412% -4.3738% -4.0715%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high mild update_order_new_after_n_elements/1000 time: [28.590 ns 28.646 ns 28.705 ns] change: [-4.1597% -3.6132% -3.0701%] (p = 0.00 < 0.05) Performance has improved. Found 2 outliers among 100 measurements (2.00%) 2 (2.00%) high mild update_order_new_after_n_elements/10000 time: [31.459 ns 31.975 ns 32.601 ns] change: [-32.153% -20.689% -11.961%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe ```` Improvements might be even bigger for more expensive hash functions (e.g. for `K = Arc<str>`). Note that there is one outlier: `update_order_new_after_n_elements/0`. I suspect this is due to slightly different compiler decisions (there is no technical difference for "update a key of an empty heap"). Since this case is also pretty uncommon in practice (only ~once when the process boots up), I deem this acceptable. Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
2022-09-08 11:50:55 +00:00
bench_update_order_existing_after_n_elements,
bench_update_order_new_after_n_elements,
}
criterion_main!(benches);