2022-10-26 14:24:49 +00:00
|
|
|
use std::{iter, sync::Arc};
|
|
|
|
|
2022-03-04 00:28:02 +00:00
|
|
|
use criterion::{
|
|
|
|
criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion,
|
|
|
|
Throughput,
|
|
|
|
};
|
2023-05-08 18:25:19 +00:00
|
|
|
use data_types::NamespaceName;
|
2022-03-04 00:28:02 +00:00
|
|
|
use hashbrown::HashMap;
|
2023-04-11 13:54:06 +00:00
|
|
|
use iox_catalog::{interface::Catalog, mem::MemCatalog};
|
2022-03-04 00:28:02 +00:00
|
|
|
use mutable_batch::MutableBatch;
|
2022-06-29 11:27:43 +00:00
|
|
|
use once_cell::sync::Lazy;
|
2022-05-06 18:51:52 +00:00
|
|
|
use router::{
|
2022-03-04 00:28:02 +00:00
|
|
|
dml_handlers::{DmlHandler, SchemaValidator},
|
2023-05-08 18:25:19 +00:00
|
|
|
namespace_cache::{MemoryNamespaceCache, NamespaceCache, ReadThroughCache, ShardedCache},
|
2022-03-04 00:28:02 +00:00
|
|
|
};
|
2022-11-02 18:15:04 +00:00
|
|
|
use schema::Projection;
|
2022-03-04 00:28:02 +00:00
|
|
|
use tokio::runtime::Runtime;
|
|
|
|
|
2022-11-10 14:13:59 +00:00
|
|
|
static NAMESPACE: Lazy<NamespaceName<'static>> = Lazy::new(|| "bananas".try_into().unwrap());
|
2022-03-04 00:28:02 +00:00
|
|
|
|
|
|
|
fn runtime() -> Runtime {
|
|
|
|
tokio::runtime::Builder::new_current_thread()
|
|
|
|
.build()
|
|
|
|
.unwrap()
|
|
|
|
}
|
|
|
|
|
test(bench): router partitioner
Adds a benchmark that exercises the router's partitioning DmlHandler
implementation against a set of three files (very small, small, medium)
with 4 different partitioning schemes:
* Single tag, which occurs in all rows
* Single tag, which does not occur in any row
* Default strftime formatter (YYYY-MM-DD)
* Long and complicated strftime formatter
This covers the entire partitioning overhead - building the formatters,
evaluating each row, grouping the values into per-partition buckets, and
returning to the caller, where it normally would be passed to the next
handler in the pipeline.
Note that only one template part is evaluated in each case - this
measures the overhead of each type of formatter. In reality, we'd expect
partitioning with custom schemes to utilise more than one part,
increasing the cost of partitioning proportionally. This is a
lower-bound measurement!
2023-06-02 14:04:09 +00:00
|
|
|
fn schema_validator_benchmarks(c: &mut Criterion) {
|
2022-03-04 00:28:02 +00:00
|
|
|
let mut group = c.benchmark_group("schema_validator");
|
|
|
|
|
|
|
|
bench(&mut group, 1, 1);
|
|
|
|
|
|
|
|
bench(&mut group, 1, 100);
|
|
|
|
bench(&mut group, 1, 10000);
|
|
|
|
|
|
|
|
bench(&mut group, 100, 1);
|
|
|
|
bench(&mut group, 10000, 1);
|
|
|
|
|
|
|
|
group.finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table: usize) {
|
|
|
|
let metrics = Arc::new(metric::Registry::default());
|
|
|
|
|
2023-04-11 13:54:06 +00:00
|
|
|
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
|
|
|
let ns_cache = Arc::new(ReadThroughCache::new(
|
|
|
|
Arc::new(ShardedCache::new(
|
|
|
|
iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10),
|
|
|
|
)),
|
|
|
|
Arc::clone(&catalog),
|
2022-08-24 11:16:04 +00:00
|
|
|
));
|
2023-05-08 18:25:19 +00:00
|
|
|
let validator = SchemaValidator::new(catalog, Arc::clone(&ns_cache), &metrics);
|
2022-03-04 00:28:02 +00:00
|
|
|
|
|
|
|
for i in 0..65_000 {
|
|
|
|
let write = lp_to_writes(format!("{}{}", i + 10_000_000, generate_lp(1, 1)).as_str());
|
2023-05-08 18:25:19 +00:00
|
|
|
let namespace_schema = runtime().block_on(ns_cache.get_schema(&NAMESPACE)).unwrap();
|
|
|
|
let _ = runtime().block_on(validator.write(&NAMESPACE, namespace_schema, write, None));
|
2022-03-04 00:28:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let write = lp_to_writes(&generate_lp(tables, columns_per_table));
|
|
|
|
let column_count = write
|
|
|
|
.values()
|
2022-11-02 18:15:04 +00:00
|
|
|
.fold(0, |acc, b| acc + b.schema(Projection::All).unwrap().len());
|
2022-03-04 00:28:02 +00:00
|
|
|
|
|
|
|
group.throughput(Throughput::Elements(column_count as _));
|
|
|
|
group.bench_function(format!("{tables}x{columns_per_table}"), |b| {
|
|
|
|
b.to_async(runtime()).iter_batched(
|
2023-05-08 18:25:19 +00:00
|
|
|
|| {
|
|
|
|
(
|
|
|
|
write.clone(),
|
|
|
|
runtime().block_on(ns_cache.get_schema(&NAMESPACE)).unwrap(),
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|(write, namespace_schema)| validator.write(&NAMESPACE, namespace_schema, write, None),
|
2022-03-04 00:28:02 +00:00
|
|
|
BatchSize::SmallInput,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
fn generate_lp(tables: usize, columns_per_table: usize) -> String {
|
|
|
|
(0..tables)
|
|
|
|
.map(|i| {
|
|
|
|
let cols = (0..columns_per_table)
|
2023-01-30 22:20:05 +00:00
|
|
|
.map(|i| format!("val{i}=42i"))
|
2022-03-04 00:28:02 +00:00
|
|
|
.collect::<Vec<_>>()
|
|
|
|
.join(",");
|
|
|
|
|
|
|
|
format!("table{i},tag=A {cols}")
|
|
|
|
})
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
.join("\n")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse `lp` into a table-keyed MutableBatch map.
|
|
|
|
fn lp_to_writes(lp: &str) -> HashMap<String, MutableBatch> {
|
|
|
|
let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
|
|
|
|
.expect("failed to build test writes from LP");
|
|
|
|
writes
|
|
|
|
}
|
|
|
|
|
test(bench): router partitioner
Adds a benchmark that exercises the router's partitioning DmlHandler
implementation against a set of three files (very small, small, medium)
with 4 different partitioning schemes:
* Single tag, which occurs in all rows
* Single tag, which does not occur in any row
* Default strftime formatter (YYYY-MM-DD)
* Long and complicated strftime formatter
This covers the entire partitioning overhead - building the formatters,
evaluating each row, grouping the values into per-partition buckets, and
returning to the caller, where it normally would be passed to the next
handler in the pipeline.
Note that only one template part is evaluated in each case - this
measures the overhead of each type of formatter. In reality, we'd expect
partitioning with custom schemes to utilise more than one part,
increasing the cost of partitioning proportionally. This is a
lower-bound measurement!
2023-06-02 14:04:09 +00:00
|
|
|
criterion_group!(benches, schema_validator_benchmarks);
|
2022-03-04 00:28:02 +00:00
|
|
|
criterion_main!(benches);
|