Merge branch 'main' into cn/test-refactor

pull/24376/head
kodiakhq[bot] 2022-11-17 14:01:36 +00:00 committed by GitHub
commit 1a49fa4864
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 205 additions and 66 deletions

28
Cargo.lock generated
View File

@ -647,9 +647,9 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.0.25" version = "4.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "389ca505fd2c00136e0d0cd34bcd8b6bd0b59d5779aab396054b716334230c1c" checksum = "2148adefda54e14492fb9bddcc600b4344c5d1a3123bd666dcb939c6f0e0e57e"
dependencies = [ dependencies = [
"atty", "atty",
"bitflags", "bitflags",
@ -664,7 +664,7 @@ dependencies = [
name = "clap_blocks" name = "clap_blocks"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"clap 4.0.25", "clap 4.0.26",
"data_types", "data_types",
"futures", "futures",
"humantime", "humantime",
@ -1341,9 +1341,9 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.5" version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
dependencies = [ dependencies = [
"block-buffer", "block-buffer",
"crypto-common", "crypto-common",
@ -1674,7 +1674,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"chrono", "chrono",
"chrono-english", "chrono-english",
"clap 4.0.25", "clap 4.0.26",
"clap_blocks", "clap_blocks",
"data_types", "data_types",
"filetime", "filetime",
@ -2172,7 +2172,7 @@ dependencies = [
"assert_cmd", "assert_cmd",
"backtrace", "backtrace",
"bytes", "bytes",
"clap 4.0.25", "clap 4.0.26",
"clap_blocks", "clap_blocks",
"comfy-table", "comfy-table",
"compactor", "compactor",
@ -2438,7 +2438,7 @@ dependencies = [
"bytes", "bytes",
"chrono", "chrono",
"chrono-english", "chrono-english",
"clap 4.0.25", "clap 4.0.26",
"criterion", "criterion",
"datafusion_util", "datafusion_util",
"futures", "futures",
@ -2536,7 +2536,7 @@ dependencies = [
"async-trait", "async-trait",
"bytes", "bytes",
"chrono", "chrono",
"clap 4.0.25", "clap 4.0.26",
"clap_blocks", "clap_blocks",
"data_types", "data_types",
"flate2", "flate2",
@ -2692,7 +2692,7 @@ name = "ioxd_test"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"clap 4.0.25", "clap 4.0.26",
"hyper", "hyper",
"ioxd_common", "ioxd_common",
"metric", "metric",
@ -4226,9 +4226,9 @@ dependencies = [
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.11.12" version = "0.11.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "431949c384f4e2ae07605ccaa56d1d9d2ecdb5cadd4f9577ccfab29f2e5149fc" checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c"
dependencies = [ dependencies = [
"base64", "base64",
"bytes", "bytes",
@ -5551,7 +5551,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"chrono", "chrono",
"clap 4.0.25", "clap 4.0.26",
"futures", "futures",
"observability_deps", "observability_deps",
"snafu", "snafu",
@ -5690,7 +5690,7 @@ name = "trogging"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"atty", "atty",
"clap 4.0.25", "clap 4.0.26",
"logfmt", "logfmt",
"observability_deps", "observability_deps",
"regex", "regex",

View File

@ -13,7 +13,6 @@ arrow = { workspace = true, features = ["prettyprint", "dyn_cmp_dict"] }
# used by arrow anyway (needed for printing workaround) # used by arrow anyway (needed for printing workaround)
chrono = { version = "0.4", default-features = false } chrono = { version = "0.4", default-features = false }
comfy-table = { version = "6.1", default-features = false } comfy-table = { version = "6.1", default-features = false }
datafusion = { workspace = true }
hashbrown = { workspace = true } hashbrown = { workspace = true }
num-traits = "0.2" num-traits = "0.2"
snafu = "0.7" snafu = "0.7"
@ -21,4 +20,5 @@ workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] [dev-dependencies]
arrow-flight = { workspace = true } arrow-flight = { workspace = true }
datafusion = { workspace = true }
rand = "0.8.3" rand = "0.8.3"

View File

@ -31,8 +31,11 @@ pub enum ParseError {
#[snafu(display("error building response: {:?}", source))] #[snafu(display("error building response: {:?}", source))]
ResponseError { source: response::Error }, ResponseError { source: response::Error },
#[snafu(display("value {:?} not supported for flag {:?}", value, flag))] #[snafu(display(
UnsupportedFlagValue { value: String, flag: String }, "value {} not supported for format. Expected 'pretty' or 'quiet'",
value
))]
UnsupportedFormat { value: String },
#[snafu(display("unsupported aggregate type: '{:?}'", agg))] #[snafu(display("unsupported aggregate type: '{:?}'", agg))]
Aggregate { agg: String }, Aggregate { agg: String },
@ -144,9 +147,8 @@ fn parse_format(format: &str) -> Result<Format, ParseError> {
"pretty" => Ok(Format::Pretty), "pretty" => Ok(Format::Pretty),
"quiet" => Ok(Format::Quiet), "quiet" => Ok(Format::Quiet),
// TODO - raw frame format? // TODO - raw frame format?
_ => Err(ParseError::UnsupportedFlagValue { _ => Err(ParseError::UnsupportedFormat {
value: format.to_owned(), value: format.to_owned(),
flag: "format".to_owned(),
}), }),
} }
} }

View File

@ -79,26 +79,6 @@ pub enum Error {
#[snafu(display("Shard {} not found in data map", shard_id))] #[snafu(display("Shard {} not found in data map", shard_id))]
ShardNotFound { shard_id: ShardId }, ShardNotFound { shard_id: ShardId },
#[snafu(display("Namespace {} not found in catalog", namespace))]
NamespaceNotFound { namespace: String },
#[snafu(display("Table {} not found in buffer", table_name))]
TableNotFound { table_name: String },
#[snafu(display("Error accessing catalog: {}", source))]
Catalog {
source: iox_catalog::interface::Error,
},
#[snafu(display("Snapshot error: {}", source))]
Snapshot { source: mutable_batch::Error },
#[snafu(display("Error while filtering columns from snapshot: {}", source))]
FilterColumn { source: arrow::error::ArrowError },
#[snafu(display("Error while copying buffer to snapshot: {}", source))]
BufferToSnapshot { source: mutable_batch::Error },
#[snafu(display("Error adding to buffer in mutable batch: {}", source))] #[snafu(display("Error adding to buffer in mutable batch: {}", source))]
BufferWrite { source: mutable_batch::Error }, BufferWrite { source: mutable_batch::Error },
} }

View File

@ -1001,7 +1001,7 @@ mod tests {
Ok(DmlOperation::Write(make_write(2222, 2))) Ok(DmlOperation::Write(make_write(2222, 2)))
]], ]],
sink_rets = [ sink_rets = [
Err(crate::data::Error::NamespaceNotFound{namespace: "bananas".to_string() }), Err(crate::data::Error::ShardNotFound{shard_id: ShardId::new(42)}),
Ok(DmlApplyAction::Applied(true)), Ok(DmlApplyAction::Applied(true)),
], ],
want_ttbr = 2, want_ttbr = 2,

View File

@ -239,7 +239,7 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use assert_matches::assert_matches; use assert_matches::assert_matches;
use data_types::{NamespaceId, Sequence, SequenceNumber, TableId}; use data_types::{NamespaceId, Sequence, SequenceNumber, ShardId, TableId};
use dml::{DmlMeta, DmlWrite}; use dml::{DmlMeta, DmlWrite};
use iox_time::Time; use iox_time::Time;
use metric::{Metric, MetricObserver, Observation}; use metric::{Metric, MetricObserver, Observation};
@ -417,13 +417,13 @@ mod tests {
let got = test( let got = test(
op, op,
&metrics, &metrics,
Err(crate::data::Error::NamespaceNotFound { Err(crate::data::Error::ShardNotFound {
namespace: "bananas".to_string(), shard_id: ShardId::new(42),
}), }),
Some(12345), Some(12345),
) )
.await; .await;
assert_matches!(got, Err(crate::data::Error::NamespaceNotFound { .. })); assert_matches!(got, Err(crate::data::Error::ShardNotFound { .. }));
// Validate the various write buffer metrics // Validate the various write buffer metrics
assert_matches!( assert_matches!(

View File

@ -1,4 +1,7 @@
use std::sync::Arc; use std::{
sync::{Arc, Barrier},
time::Instant,
};
use criterion::{ use criterion::{
criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, Throughput, criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, Throughput,
@ -6,7 +9,7 @@ use criterion::{
use data_types::NamespaceName; use data_types::NamespaceName;
use mutable_batch::MutableBatch; use mutable_batch::MutableBatch;
use rand::{distributions::Alphanumeric, thread_rng, Rng}; use rand::{distributions::Alphanumeric, thread_rng, Rng};
use sharder::{JumpHash, Sharder}; use sharder::{JumpHash, RoundRobin, Sharder};
fn get_random_string(length: usize) -> String { fn get_random_string(length: usize) -> String {
thread_rng() thread_rng()
@ -17,82 +20,138 @@ fn get_random_string(length: usize) -> String {
} }
fn sharder_benchmarks(c: &mut Criterion) { fn sharder_benchmarks(c: &mut Criterion) {
let mut group = c.benchmark_group("sharder"); benchmark_impl(c, "jumphash", |num_buckets| {
JumpHash::new((0..num_buckets).map(Arc::new))
});
benchmark_impl(c, "round_robin", |num_buckets| {
RoundRobin::new((0..num_buckets).map(Arc::new))
});
}
fn benchmark_impl<T, F>(c: &mut Criterion, name: &str, init: F)
where
T: Sharder<MutableBatch>,
F: Fn(usize) -> T,
{
let mut group = c.benchmark_group(name);
// benchmark sharder with fixed table name and namespace, with varying number of buckets // benchmark sharder with fixed table name and namespace, with varying number of buckets
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
1_000,
"basic 1k buckets", "basic 1k buckets",
"table", "table",
&NamespaceName::try_from("namespace").unwrap(), &NamespaceName::try_from("namespace").unwrap(),
init(1_000),
); );
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
10_000,
"basic 10k buckets", "basic 10k buckets",
"table", "table",
&NamespaceName::try_from("namespace").unwrap(), &NamespaceName::try_from("namespace").unwrap(),
init(10_000),
); );
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
100_000,
"basic 100k buckets", "basic 100k buckets",
"table", "table",
&NamespaceName::try_from("namespace").unwrap(), &NamespaceName::try_from("namespace").unwrap(),
init(100_000),
); );
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
1_000_000,
"basic 1M buckets", "basic 1M buckets",
"table", "table",
&NamespaceName::try_from("namespace").unwrap(), &NamespaceName::try_from("namespace").unwrap(),
init(1_000_000),
); );
// benchmark sharder with random table name and namespace of length 16 // benchmark sharder with random table name and namespace of length 16
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
10_000,
"random with key-length 16", "random with key-length 16",
get_random_string(16).as_str(), get_random_string(16).as_str(),
&NamespaceName::try_from(get_random_string(16)).unwrap(), &NamespaceName::try_from(get_random_string(16)).unwrap(),
init(10_000),
); );
// benchmark sharder with random table name and namespace of length 32 // benchmark sharder with random table name and namespace of length 32
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
10_000,
"random with key-length 32", "random with key-length 32",
get_random_string(32).as_str(), get_random_string(32).as_str(),
&NamespaceName::try_from(get_random_string(32)).unwrap(), &NamespaceName::try_from(get_random_string(32)).unwrap(),
init(10_000),
); );
// benchmark sharder with random table name and namespace of length 64 // benchmark sharder with random table name and namespace of length 64
benchmark_sharder( benchmark_scenario(
&mut group, &mut group,
10_000,
"random with key-length 64", "random with key-length 64",
get_random_string(64).as_str(), get_random_string(64).as_str(),
&NamespaceName::try_from(get_random_string(64)).unwrap(), &NamespaceName::try_from(get_random_string(64)).unwrap(),
init(10_000),
); );
group.finish(); group.finish();
} }
fn benchmark_sharder( fn benchmark_scenario<T>(
group: &mut BenchmarkGroup<WallTime>, group: &mut BenchmarkGroup<WallTime>,
num_buckets: usize,
bench_name: &str, bench_name: &str,
table: &str, table: &str,
namespace: &NamespaceName<'_>, namespace: &NamespaceName<'_>,
) { sharder: T,
let hasher = JumpHash::new((0..num_buckets).map(Arc::new)); ) where
T: Sharder<MutableBatch>,
{
let batch = MutableBatch::default(); let batch = MutableBatch::default();
group.throughput(Throughput::Elements(1)); group.throughput(Throughput::Elements(1));
group.bench_function(bench_name, |b| { group.bench_function(bench_name, |b| {
b.iter(|| { b.iter(|| {
hasher.shard(table, namespace, &batch); sharder.shard(table, namespace, &batch);
});
});
const N_THREADS: usize = 10;
// Run the same test with N contending threads.
//
// Note that this includes going through pointer indirection for each shard
// op due to the Arc.
let sharder = Arc::new(sharder);
group.bench_function(format!("{bench_name}_{N_THREADS}_threads"), |b| {
b.iter_custom(|iters| {
let sharder = Arc::clone(&sharder);
std::thread::scope(|s| {
let barrier = Arc::new(Barrier::new(N_THREADS));
// Spawn N-1 threads that wait for the last thread to spawn
for _ in 0..(N_THREADS - 1) {
let sharder = Arc::clone(&sharder);
let barrier = Arc::clone(&barrier);
s.spawn(move || {
let batch = MutableBatch::default();
barrier.wait();
for _ in 0..iters {
sharder.shard(table, namespace, &batch);
}
});
}
// Spawn the Nth thread that performs the same sharding ops, but
// measures the duration of time taken.
s.spawn(move || {
let batch = MutableBatch::default();
barrier.wait();
let start = Instant::now();
for _ in 0..iters {
sharder.shard(table, namespace, &batch);
}
start.elapsed()
})
.join()
.unwrap()
})
}); });
}); });
} }

View File

@ -20,6 +20,9 @@
mod r#trait; mod r#trait;
pub use r#trait::*; pub use r#trait::*;
mod round_robin;
pub use round_robin::*;
mod jumphash; mod jumphash;
pub use jumphash::*; pub use jumphash::*;

View File

@ -0,0 +1,95 @@
use std::{cell::RefCell, fmt::Debug, sync::Arc};
use crate::Sharder;
thread_local! {
/// A per-thread counter incremented once per call to
/// [`RoundRobin::next()`].
static COUNTER: RefCell<usize> = RefCell::new(0);
}
/// A round-robin sharder (with no data locality) that arbitrarily maps to `T`
/// with an approximately uniform distribution.
///
/// # Distribution
///
/// Requests are distributed uniformly across all shards **per thread**. Given
/// enough requests (where `N` is significantly larger than the number of
/// threads) an approximately uniform distribution is achieved.
#[derive(Debug)]
pub struct RoundRobin<T> {
shards: Vec<T>,
}
impl<T> RoundRobin<T> {
/// Construct a new [`RoundRobin`] sharder that maps requests to each of
/// `shards`.
pub fn new(shards: impl IntoIterator<Item = T>) -> Self {
Self {
shards: shards.into_iter().collect(),
}
}
/// Return the next `T` to be used.
pub fn next(&self) -> &T {
// Grab and increment the current counter.
let counter = COUNTER.with(|cell| {
let mut cell = cell.borrow_mut();
let new_value = cell.wrapping_add(1);
*cell = new_value;
new_value
});
// Reduce it to the range of [0, N) where N is the number of shards in
// this sharder.
let idx = counter % self.shards.len();
self.shards.get(idx).expect("mapped to out-of-bounds shard")
}
}
impl<T, U> Sharder<U> for RoundRobin<Arc<T>>
where
T: Send + Sync + Debug,
U: Send + Sync + Debug,
{
type Item = Arc<T>;
fn shard(
&self,
_table: &str,
_namespace: &data_types::NamespaceName<'_>,
_payload: &U,
) -> Self::Item {
Arc::clone(self.next())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
// Note this is a property test that asserts the round-robin nature of the
// returned results, not the values themselves.
#[test]
fn test_round_robin() {
// Initialise sharder with a set of 5 shards
let shards = ["s1", "s2", "s3", "s4", "s5"];
let sharder = RoundRobin::new(shards.iter().map(Arc::new));
// Request the first N mappings.
#[allow(clippy::needless_collect)] // Incorrect lint
let mappings = (0..shards.len())
.map(|_| sharder.next())
.collect::<Vec<_>>();
// Request another 100 shard mappings, and ensure the shards are
// yielded in round-robin fashion (matching the initial shard
// mappings)
for want in mappings.into_iter().cycle().take(100) {
assert_eq!(want, sharder.next());
}
}
}