From f5f80e879e5a1e3824279930ec5a5b9d087cb267 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Mon, 25 Apr 2022 16:37:29 +0200
Subject: [PATCH] test: add benchmarks for addressable heap (#4201)

---
 Cargo.lock                              |   1 +
 querier/Cargo.toml                      |   5 +
 querier/benches/addressable_heap.rs     | 315 ++++++++++++++++++++++++
 querier/src/cache_system/backend/mod.rs |   2 +-
 querier/src/lib.rs                      |   3 +
 5 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 querier/benches/addressable_heap.rs

diff --git a/Cargo.lock b/Cargo.lock
index e9d598fea4..b888f7f7ca 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4537,6 +4537,7 @@ dependencies = [
  "backoff 0.1.0",
  "bytes",
  "client_util",
+ "criterion",
  "data_types",
  "data_types2",
  "datafusion 0.1.0",
diff --git a/querier/Cargo.toml b/querier/Cargo.toml
index 9e556e76a6..1498191896 100644
--- a/querier/Cargo.toml
+++ b/querier/Cargo.toml
@@ -46,6 +46,11 @@ workspace-hack = { path = "../workspace-hack"}
 arrow_util = { path = "../arrow_util" }
 assert_matches = "1.5"
 bytes = "1.0"
+criterion = "0.3"
 iox_tests = { path = "../iox_tests" }
 mutable_batch_lp = { path = "../mutable_batch_lp" }
 proptest = { version = "1", default_features = false, features = ["std"] }
+
+[[bench]]
+name = "addressable_heap"
+harness = false
diff --git a/querier/benches/addressable_heap.rs b/querier/benches/addressable_heap.rs
new file mode 100644
index 0000000000..5656eb1aa3
--- /dev/null
+++ b/querier/benches/addressable_heap.rs
@@ -0,0 +1,315 @@
+use std::mem::size_of;
+
+use criterion::{
+    criterion_group, criterion_main, measurement::WallTime, AxisScale, BatchSize, BenchmarkGroup,
+    BenchmarkId, Criterion, PlotConfiguration, SamplingMode,
+};
+use querier::AddressableHeap;
+use rand::{prelude::SliceRandom, thread_rng, Rng};
+
+/// Payload (`V`) for testing.
+///
+/// This is a 64bit-wide object which is enough to store a [`Box`] or a [`usize`].
+#[derive(Debug, Clone, Default)]
+struct Payload([u8; 8]);
+
+const _: () = assert!(size_of::<Payload>() == 8);
+const _: () = assert!(size_of::<Payload>() >= size_of::<Box<Vec<u32>>>());
+const _: () = assert!(size_of::<Payload>() >= size_of::<usize>());
+
+type TestHeap = AddressableHeap<u64, Payload, u64>;
+
+const TEST_SIZES: &[usize] = &[0, 1, 10, 100, 1_000, 10_000];
+
+#[derive(Debug, Clone)]
+struct Entry {
+    k: u64,
+    o: u64,
+}
+
+impl Entry {
+    fn new_random<R>(rng: &mut R) -> Self
+    where
+        R: Rng,
+    {
+        Self {
+            k: rng.gen(),
+            o: rng.gen(),
+        }
+    }
+
+    fn new_random_n<R>(rng: &mut R, n: usize) -> Vec<Self>
+    where
+        R: Rng,
+    {
+        (0..n).map(|_| Self::new_random(rng)).collect()
+    }
+}
+
+fn create_filled_heap<R>(rng: &mut R, n: usize) -> (TestHeap, Vec<Entry>)
+where
+    R: Rng,
+{
+    let mut heap = TestHeap::default();
+    let mut entries = Vec::with_capacity(n);
+
+    for _ in 0..n {
+        let entry = Entry::new_random(rng);
+        heap.insert(entry.k, Payload::default(), entry.o);
+        entries.push(entry);
+    }
+
+    (heap, entries)
+}
+
+fn setup_group(g: &mut BenchmarkGroup<'_, WallTime>) {
+    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+    g.sampling_mode(SamplingMode::Flat);
+}
+
+fn bench_insert_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("insert_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || (TestHeap::default(), Entry::new_random_n(&mut rng, *n)),
+                |(mut heap, entries)| {
+                    for entry in &entries {
+                        heap.insert(entry.k, Payload::default(), entry.o);
+                    }
+
+                    // let criterion handle the drop
+                    (heap, entries)
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_peek_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("peek_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || create_filled_heap(&mut rng, *n).0,
+                |heap| {
+                    heap.peek();
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_get_existing_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("get_existing_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        if *n == 0 {
+            continue;
+        }
+
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || {
+                    let (heap, entries) = create_filled_heap(&mut rng, *n);
+                    let entry = entries.choose(&mut rng).unwrap().clone();
+                    (heap, entry)
+                },
+                |(heap, entry)| {
+                    heap.get(&entry.k);
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_get_new_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("get_new_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || {
+                    let (heap, _entries) = create_filled_heap(&mut rng, *n);
+                    let entry = Entry::new_random(&mut rng);
+                    (heap, entry)
+                },
+                |(heap, entry)| {
+                    heap.get(&entry.k);
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_pop_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("pop_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || create_filled_heap(&mut rng, *n).0,
+                |mut heap| {
+                    for _ in 0..*n {
+                        heap.pop();
+                    }
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_remove_existing_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("remove_existing_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        if *n == 0 {
+            continue;
+        }
+
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || {
+                    let (heap, entries) = create_filled_heap(&mut rng, *n);
+                    let entry = entries.choose(&mut rng).unwrap().clone();
+                    (heap, entry)
+                },
+                |(mut heap, entry)| {
+                    heap.remove(&entry.k);
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_remove_new_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("remove_new_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || {
+                    let (heap, _entries) = create_filled_heap(&mut rng, *n);
+                    let entry = Entry::new_random(&mut rng);
+                    (heap, entry)
+                },
+                |(mut heap, entry)| {
+                    heap.remove(&entry.k);
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+fn bench_replace_after_n_elements(c: &mut Criterion) {
+    let mut g = c.benchmark_group("replace_after_n_elements");
+    setup_group(&mut g);
+
+    let mut rng = thread_rng();
+
+    for n in TEST_SIZES {
+        if *n == 0 {
+            continue;
+        }
+
+        g.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &_n| {
+            b.iter_batched(
+                || {
+                    let (heap, entries) = create_filled_heap(&mut rng, *n);
+                    let entry = entries.choose(&mut rng).unwrap().clone();
+                    let entry = Entry {
+                        k: entry.k,
+                        o: Entry::new_random(&mut rng).o,
+                    };
+                    (heap, entry)
+                },
+                |(mut heap, entry)| {
+                    heap.insert(entry.k, Payload::default(), entry.o);
+
+                    // let criterion handle the drop
+                    heap
+                },
+                BatchSize::LargeInput,
+            );
+        });
+    }
+
+    g.finish();
+}
+
+criterion_group! {
+    name = benches;
+    config = Criterion::default();
+    targets =
+        bench_insert_n_elements,
+        bench_peek_after_n_elements,
+        bench_get_existing_after_n_elements,
+        bench_get_new_after_n_elements,
+        bench_pop_n_elements,
+        bench_remove_existing_after_n_elements,
+        bench_remove_new_after_n_elements,
+        bench_replace_after_n_elements,
+}
+criterion_main!(benches);
diff --git a/querier/src/cache_system/backend/mod.rs b/querier/src/cache_system/backend/mod.rs
index 3b5fa449bf..84f6bff425 100644
--- a/querier/src/cache_system/backend/mod.rs
+++ b/querier/src/cache_system/backend/mod.rs
@@ -1,6 +1,6 @@
 use std::{any::Any, fmt::Debug, hash::Hash};
 
-mod addressable_heap;
+pub mod addressable_heap;
 pub mod dual;
 pub mod hash_map;
 pub mod ttl;
diff --git a/querier/src/lib.rs b/querier/src/lib.rs
index ed2edba79d..2f2efc2173 100644
--- a/querier/src/lib.rs
+++ b/querier/src/lib.rs
@@ -32,3 +32,6 @@ pub use ingester::{
 };
 pub use namespace::QuerierNamespace;
 pub use server::QuerierServer;
+
+// for benchmarks
+pub use cache_system::backend::addressable_heap::AddressableHeap;