2021-05-05 16:59:12 +00:00
|
|
|
use arrow::{
|
2021-01-25 21:06:04 +00:00
|
|
|
array::{ArrayRef, Int64Array, StringArray},
|
|
|
|
record_batch::RecordBatch,
|
|
|
|
};
|
2021-07-13 01:49:48 +00:00
|
|
|
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
|
2021-03-19 16:27:57 +00:00
|
|
|
use internal_types::schema::builder::SchemaBuilder;
|
2021-06-11 16:25:33 +00:00
|
|
|
use read_buffer::{BinaryExpr, ChunkMetrics, Predicate, RBChunk};
|
2021-07-13 01:49:48 +00:00
|
|
|
use std::sync::Arc;
|
2021-01-25 21:06:04 +00:00
|
|
|
|
|
|
|
const BASE_TIME: i64 = 1351700038292387000_i64;
|
|
|
|
const ONE_MS: i64 = 1_000_000;
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
fn satisfies_predicate(c: &mut Criterion) {
|
2021-01-25 21:06:04 +00:00
|
|
|
let rb = generate_row_group(500_000);
|
2021-07-26 20:48:03 +00:00
|
|
|
let chunk = RBChunk::new("table_a", rb, ChunkMetrics::new_unregistered());
|
2021-01-25 21:06:04 +00:00
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
// no predicate
|
|
|
|
benchmark_satisfies_predicate(
|
2021-01-25 21:06:04 +00:00
|
|
|
c,
|
2021-06-29 10:47:48 +00:00
|
|
|
"database_satisfies_predicate_all_tables",
|
2021-04-07 21:38:04 +00:00
|
|
|
&chunk,
|
2021-01-25 21:06:04 +00:00
|
|
|
Predicate::default(),
|
2021-06-29 10:47:48 +00:00
|
|
|
true,
|
2021-01-25 21:06:04 +00:00
|
|
|
);
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
// predicate but meta-data rules out matches
|
|
|
|
benchmark_satisfies_predicate(
|
2021-01-25 21:06:04 +00:00
|
|
|
c,
|
2021-06-29 10:47:48 +00:00
|
|
|
"database_satisfies_predicate_meta_pred_no_match",
|
2021-04-07 21:38:04 +00:00
|
|
|
&chunk,
|
2021-01-25 21:06:04 +00:00
|
|
|
Predicate::new(vec![BinaryExpr::from(("env", "=", "zoo"))]),
|
2021-06-29 10:47:48 +00:00
|
|
|
false,
|
2021-01-25 21:06:04 +00:00
|
|
|
);
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
// predicate - single expression matches at least one row
|
|
|
|
benchmark_satisfies_predicate(
|
2021-01-25 21:06:04 +00:00
|
|
|
c,
|
2021-06-29 10:47:48 +00:00
|
|
|
"database_satisfies_predicate_single_pred_match",
|
2021-04-07 21:38:04 +00:00
|
|
|
&chunk,
|
2021-01-25 21:06:04 +00:00
|
|
|
Predicate::new(vec![BinaryExpr::from(("env", "=", "prod"))]),
|
2021-06-29 10:47:48 +00:00
|
|
|
true,
|
2021-01-25 21:06:04 +00:00
|
|
|
);
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
// predicate - at least one row matches all expressions
|
|
|
|
benchmark_satisfies_predicate(
|
2021-01-25 21:06:04 +00:00
|
|
|
c,
|
2021-06-29 10:47:48 +00:00
|
|
|
"database_satisfies_predicate_multi_pred_match",
|
2021-04-07 21:38:04 +00:00
|
|
|
&chunk,
|
2021-01-25 21:06:04 +00:00
|
|
|
Predicate::new(vec![
|
|
|
|
BinaryExpr::from(("env", "=", "prod")),
|
|
|
|
BinaryExpr::from(("time", ">=", BASE_TIME)),
|
|
|
|
BinaryExpr::from(("time", "<", BASE_TIME + (ONE_MS * 10000))),
|
|
|
|
]),
|
2021-06-29 10:47:48 +00:00
|
|
|
true,
|
2021-01-25 21:06:04 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
fn benchmark_satisfies_predicate(
|
2021-01-25 21:06:04 +00:00
|
|
|
c: &mut Criterion,
|
|
|
|
bench_name: &str,
|
2021-06-11 16:25:33 +00:00
|
|
|
chunk: &RBChunk,
|
2021-01-25 21:06:04 +00:00
|
|
|
predicate: Predicate,
|
2021-06-29 10:47:48 +00:00
|
|
|
satisfies: bool,
|
2021-01-25 21:06:04 +00:00
|
|
|
) {
|
|
|
|
c.bench_function(bench_name, |b| {
|
|
|
|
b.iter_batched(
|
|
|
|
|| predicate.clone(), // don't want to time predicate cloning
|
|
|
|
|predicate: Predicate| {
|
2021-06-29 10:47:48 +00:00
|
|
|
assert_eq!(chunk.satisfies_predicate(&predicate), satisfies);
|
2021-01-25 21:06:04 +00:00
|
|
|
},
|
|
|
|
BatchSize::SmallInput,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// generate a row group with three columns of varying cardinality.
|
|
|
|
fn generate_row_group(rows: usize) -> RecordBatch {
|
|
|
|
let schema = SchemaBuilder::new()
|
|
|
|
.non_null_tag("env")
|
|
|
|
.non_null_tag("container_id")
|
|
|
|
.timestamp()
|
|
|
|
.build()
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let container_ids = (0..rows)
|
|
|
|
.into_iter()
|
|
|
|
.map(|i| format!("my_container_{:?}", i))
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
|
|
let data: Vec<ArrayRef> = vec![
|
|
|
|
// sorted 2 cardinality column
|
|
|
|
Arc::new(StringArray::from(
|
|
|
|
(0..rows)
|
|
|
|
.into_iter()
|
|
|
|
.map(|i| if i < rows / 2 { "prod" } else { "dev" })
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
)),
|
|
|
|
// completely unique cardinality column
|
|
|
|
Arc::new(StringArray::from(
|
|
|
|
container_ids
|
|
|
|
.iter()
|
|
|
|
.map(|id| id.as_str())
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
)),
|
|
|
|
// ms increasing time column;
|
|
|
|
Arc::new(Int64Array::from(
|
|
|
|
(0..rows)
|
|
|
|
.into_iter()
|
|
|
|
.map(|i| BASE_TIME + (i as i64 * ONE_MS))
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
)),
|
|
|
|
];
|
|
|
|
|
|
|
|
RecordBatch::try_new(schema.into(), data).unwrap()
|
|
|
|
}
|
|
|
|
|
2021-06-29 10:47:48 +00:00
|
|
|
criterion_group!(benches, satisfies_predicate);
|
2021-01-25 21:06:04 +00:00
|
|
|
criterion_main!(benches);
|