fix: Change the sharder to return error instead of panicking for no shards

pull/24376/head
Carol (Nichols || Goulding) 2022-06-15 11:09:57 -04:00
parent e9cdaffe74
commit 03f6f59a9b
No known key found for this signature in database
GPG Key ID: E907EE5A736F87D4
12 changed files with 110 additions and 77 deletions

2
Cargo.lock generated
View File

@ -4945,6 +4945,8 @@ dependencies = [
"parking_lot 0.12.1", "parking_lot 0.12.1",
"rand", "rand",
"siphasher", "siphasher",
"snafu",
"test_helpers",
"workspace-hack", "workspace-hack",
] ]

View File

@ -63,7 +63,7 @@ mod tests {
async fn test_get_namespaces_empty() { async fn test_get_namespaces_empty() {
let catalog = TestCatalog::new(); let catalog = TestCatalog::new();
// QuerierDatabase::new panics if there are no sequencers in the catalog // QuerierDatabase::new returns an error if there are no sequencers in the catalog
catalog.create_sequencer(0).await; catalog.create_sequencer(0).await;
let catalog_cache = Arc::new(QuerierCatalogCache::new( let catalog_cache = Arc::new(QuerierCatalogCache::new(
@ -98,7 +98,7 @@ mod tests {
async fn test_get_namespaces() { async fn test_get_namespaces() {
let catalog = TestCatalog::new(); let catalog = TestCatalog::new();
// QuerierDatabase::new panics if there are no sequencers in the catalog // QuerierDatabase::new returns an error if there are no sequencers in the catalog
catalog.create_sequencer(0).await; catalog.create_sequencer(0).await;
let catalog_cache = Arc::new(QuerierCatalogCache::new( let catalog_cache = Arc::new(QuerierCatalogCache::new(

View File

@ -49,6 +49,9 @@ pub enum Error {
#[error("Catalog DSN error: {0}")] #[error("Catalog DSN error: {0}")]
CatalogDsn(#[from] clap_blocks::catalog_dsn::Error), CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
#[error("failed to initialize sharded cache: {0}")]
Sharder(#[from] sharder::Error),
} }
pub type Result<T, E = Error> = std::result::Result<T, E>; pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -175,7 +178,7 @@ pub async fn create_router_server_type(
let ns_cache = Arc::new(InstrumentedCache::new( let ns_cache = Arc::new(InstrumentedCache::new(
Arc::new(ShardedCache::new( Arc::new(ShardedCache::new(
std::iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10), std::iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10),
)), )?),
&*metrics, &*metrics,
)); ));
@ -319,13 +322,12 @@ async fn init_write_buffer(
"connected to write buffer topic", "connected to write buffer topic",
); );
Ok(ShardedWriteBuffer::new( Ok(ShardedWriteBuffer::new(JumpHash::new(
shards shards
.into_iter() .into_iter()
.map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &metrics)) .map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &metrics))
.map(Arc::new) .map(Arc::new),
.collect::<JumpHash<_>>(), )?))
))
} }
/// Pre-populate `cache` with the all existing schemas in `catalog`. /// Pre-populate `cache` with the all existing schemas in `catalog`.

View File

@ -30,6 +30,9 @@ pub enum Error {
Catalog { Catalog {
source: iox_catalog::interface::Error, source: iox_catalog::interface::Error,
}, },
#[snafu(display("Sharder error: {source}"))]
Sharder { source: sharder::Error },
} }
/// Database for the querier. /// Database for the querier.
@ -202,16 +205,15 @@ pub async fn create_sharder(catalog: &dyn Catalog) -> Result<JumpHash<Arc<KafkaP
.map(|sequencer| sequencer.kafka_partition) .map(|sequencer| sequencer.kafka_partition)
.collect(); .collect();
Ok(shards.into_iter().map(Arc::new).collect()) JumpHash::new(shards.into_iter().map(Arc::new)).context(SharderSnafu)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use iox_tests::util::TestCatalog;
use crate::create_ingester_connection_for_testing;
use super::*; use super::*;
use crate::create_ingester_connection_for_testing;
use iox_tests::util::TestCatalog;
use test_helpers::assert_error;
#[tokio::test] #[tokio::test]
#[should_panic( #[should_panic(
@ -239,7 +241,6 @@ mod tests {
} }
#[tokio::test] #[tokio::test]
#[should_panic(expected = "cannot initialise sharder with no shards")]
async fn sequencers_in_catalog_are_required_for_startup() { async fn sequencers_in_catalog_are_required_for_startup() {
let catalog = TestCatalog::new(); let catalog = TestCatalog::new();
@ -250,6 +251,7 @@ mod tests {
usize::MAX, usize::MAX,
)); ));
assert_error!(
QuerierDatabase::new( QuerierDatabase::new(
catalog_cache, catalog_cache,
catalog.metric_registry(), catalog.metric_registry(),
@ -258,14 +260,17 @@ mod tests {
create_ingester_connection_for_testing(), create_ingester_connection_for_testing(),
QuerierDatabase::MAX_CONCURRENT_QUERIES_MAX, QuerierDatabase::MAX_CONCURRENT_QUERIES_MAX,
) )
.await .await,
.unwrap(); Error::Sharder {
source: sharder::Error::NoShards
},
);
} }
#[tokio::test] #[tokio::test]
async fn test_namespace() { async fn test_namespace() {
let catalog = TestCatalog::new(); let catalog = TestCatalog::new();
// QuerierDatabase::new panics if there are no sequencers in the catalog // QuerierDatabase::new returns an error if there are no sequencers in the catalog
catalog.create_sequencer(0).await; catalog.create_sequencer(0).await;
let catalog_cache = Arc::new(CatalogCache::new( let catalog_cache = Arc::new(CatalogCache::new(
@ -294,7 +299,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_namespaces() { async fn test_namespaces() {
let catalog = TestCatalog::new(); let catalog = TestCatalog::new();
// QuerierDatabase::new panics if there are no sequencers in the catalog // QuerierDatabase::new returns an error if there are no sequencers in the catalog
catalog.create_sequencer(0).await; catalog.create_sequencer(0).await;
let catalog_cache = Arc::new(CatalogCache::new( let catalog_cache = Arc::new(CatalogCache::new(

View File

@ -172,7 +172,7 @@ mod tests {
Arc::clone(&metric_registry), Arc::clone(&metric_registry),
usize::MAX, usize::MAX,
)); ));
// QuerierDatabase::new panics if there are no sequencers in the catalog // QuerierDatabase::new returns an error if there are no sequencers in the catalog
{ {
let mut repos = catalog.repositories().await; let mut repos = catalog.repositories().await;

View File

@ -35,11 +35,13 @@ fn init_write_buffer(n_sequencers: u32) -> ShardedWriteBuffer<JumpHash<Arc<Seque
let shards: BTreeSet<_> = write_buffer.sequencer_ids(); let shards: BTreeSet<_> = write_buffer.sequencer_ids();
ShardedWriteBuffer::new( ShardedWriteBuffer::new(
JumpHash::new(
shards shards
.into_iter() .into_iter()
.map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &Default::default())) .map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &Default::default()))
.map(Arc::new) .map(Arc::new),
.collect::<JumpHash<_>>(), )
.expect("failed to init sharder"),
) )
} }
@ -57,9 +59,12 @@ fn e2e_benchmarks(c: &mut Criterion) {
let delegate = { let delegate = {
let metrics = Arc::new(metric::Registry::new()); let metrics = Arc::new(metric::Registry::new());
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics))); let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
let ns_cache = Arc::new(ShardedCache::new( let ns_cache = Arc::new(
ShardedCache::new(
iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10), iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10),
)); )
.unwrap(),
);
let write_buffer = init_write_buffer(1); let write_buffer = init_write_buffer(1);
let schema_validator = let schema_validator =

View File

@ -42,9 +42,10 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
let metrics = Arc::new(metric::Registry::default()); let metrics = Arc::new(metric::Registry::default());
let catalog = Arc::new(MemCatalog::new(Arc::clone(&metrics))); let catalog = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
let ns_cache = Arc::new(ShardedCache::new( let ns_cache = Arc::new(
iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10), ShardedCache::new(iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10))
)); .unwrap(),
);
let validator = SchemaValidator::new(catalog, ns_cache, &*metrics); let validator = SchemaValidator::new(catalog, ns_cache, &*metrics);
for i in 0..65_000 { for i in 0..65_000 {

View File

@ -12,10 +12,10 @@ pub struct ShardedCache<T> {
impl<T> ShardedCache<T> { impl<T> ShardedCache<T> {
/// initialise a [`ShardedCache`] splitting the keyspace over the given /// initialise a [`ShardedCache`] splitting the keyspace over the given
/// instances of `T`. /// instances of `T`.
pub fn new(shards: impl IntoIterator<Item = T>) -> Self { pub fn new(shards: impl IntoIterator<Item = T>) -> Result<Self, sharder::Error> {
Self { Ok(Self {
shards: JumpHash::new(shards), shards: JumpHash::new(shards)?,
} })
} }
} }
@ -71,9 +71,12 @@ mod tests {
// The number of shards to hash into. // The number of shards to hash into.
const SHARDS: usize = 10; const SHARDS: usize = 10;
let cache = Arc::new(ShardedCache::new( let cache = Arc::new(
ShardedCache::new(
iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(SHARDS), iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(SHARDS),
)); )
.unwrap(),
);
// Build a set of namespace -> unique integer to validate the shard // Build a set of namespace -> unique integer to validate the shard
// mapping later. // mapping later.

View File

@ -83,17 +83,22 @@ impl TestContext {
let shards: BTreeSet<_> = write_buffer.sequencer_ids(); let shards: BTreeSet<_> = write_buffer.sequencer_ids();
let sharded_write_buffer = ShardedWriteBuffer::new( let sharded_write_buffer = ShardedWriteBuffer::new(
JumpHash::new(
shards shards
.into_iter() .into_iter()
.map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &metrics)) .map(|id| Sequencer::new(id as _, Arc::clone(&write_buffer), &metrics))
.map(Arc::new) .map(Arc::new),
.collect::<JumpHash<_>>(), )
.unwrap(),
); );
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics))); let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
let ns_cache = Arc::new(ShardedCache::new( let ns_cache = Arc::new(
ShardedCache::new(
iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10), iter::repeat_with(|| Arc::new(MemoryNamespaceCache::default())).take(10),
)); )
.unwrap(),
);
let ns_creator = NamespaceAutocreation::new( let ns_creator = NamespaceAutocreation::new(
Arc::clone(&catalog), Arc::clone(&catalog),

View File

@ -8,6 +8,7 @@ data_types = { path = "../data_types" }
mutable_batch = { path = "../mutable_batch" } mutable_batch = { path = "../mutable_batch" }
parking_lot = "0.12" parking_lot = "0.12"
siphasher = "0.3" siphasher = "0.3"
snafu = "0.7"
workspace-hack = { path = "../workspace-hack"} workspace-hack = { path = "../workspace-hack"}
[dev-dependencies] [dev-dependencies]
@ -15,6 +16,7 @@ criterion = { version = "0.3.4", features = ["async_tokio", "html_reports"] }
hashbrown = "0.12" hashbrown = "0.12"
mutable_batch_lp = { path = "../mutable_batch_lp" } mutable_batch_lp = { path = "../mutable_batch_lp" }
rand = "0.8.3" rand = "0.8.3"
test_helpers = { path = "../test_helpers" }
[[bench]] [[bench]]
name = "sharder" name = "sharder"

View File

@ -86,7 +86,7 @@ fn benchmark_sharder(
table: &str, table: &str,
namespace: &DatabaseName<'_>, namespace: &DatabaseName<'_>,
) { ) {
let hasher = JumpHash::new((0..num_buckets).map(Arc::new)); let hasher = JumpHash::new((0..num_buckets).map(Arc::new)).unwrap();
let batch = MutableBatch::default(); let batch = MutableBatch::default();
group.throughput(Throughput::Elements(1)); group.throughput(Throughput::Elements(1));

View File

@ -2,12 +2,20 @@ use super::Sharder;
use data_types::{DatabaseName, DeletePredicate}; use data_types::{DatabaseName, DeletePredicate};
use mutable_batch::MutableBatch; use mutable_batch::MutableBatch;
use siphasher::sip::SipHasher13; use siphasher::sip::SipHasher13;
use snafu::{ensure, Snafu};
use std::{ use std::{
fmt::Debug, fmt::Debug,
hash::{Hash, Hasher}, hash::{Hash, Hasher},
sync::Arc, sync::Arc,
}; };
#[derive(Snafu, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[snafu(display("Cannot initialize sharder with no shards"))]
NoShards,
}
/// A [`JumpHash`] maps operations for a given table in a given namespace /// A [`JumpHash`] maps operations for a given table in a given namespace
/// consistently to the same shard, irrespective of the operation itself with /// consistently to the same shard, irrespective of the operation itself with
/// near perfect distribution. /// near perfect distribution.
@ -39,7 +47,7 @@ impl<T> JumpHash<T> {
/// # Panics /// # Panics
/// ///
/// This constructor panics if the number of elements in `shards` is 0. /// This constructor panics if the number of elements in `shards` is 0.
pub fn new(shards: impl IntoIterator<Item = T>) -> Self { pub fn new(shards: impl IntoIterator<Item = T>) -> Result<Self, Error> {
// A randomly generated static siphash key to ensure all router // A randomly generated static siphash key to ensure all router
// instances hash the same input to the same u64 sharding key. // instances hash the same input to the same u64 sharding key.
// //
@ -50,15 +58,12 @@ impl<T> JumpHash<T> {
]; ];
let shards = shards.into_iter().collect::<Vec<_>>(); let shards = shards.into_iter().collect::<Vec<_>>();
assert!( ensure!(!shards.is_empty(), NoShardsSnafu,);
!shards.is_empty(),
"cannot initialise sharder with no shards"
);
Self { Ok(Self {
hasher: SipHasher13::new_with_key(&key), hasher: SipHasher13::new_with_key(&key),
shards, shards,
} })
} }
/// Return a slice of all the shards this instance is configured with, /// Return a slice of all the shards this instance is configured with,
@ -102,12 +107,6 @@ impl<T> JumpHash<T> {
} }
} }
impl<T> FromIterator<T> for JumpHash<T> {
fn from_iter<U: IntoIterator<Item = T>>(iter: U) -> Self {
Self::new(iter)
}
}
#[derive(Hash)] #[derive(Hash)]
struct HashKey<'a> { struct HashKey<'a> {
table: &'a str, table: &'a str,
@ -169,17 +168,18 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use data_types::TimestampRange; use data_types::TimestampRange;
use hashbrown::HashMap; use hashbrown::HashMap;
use std::iter;
use super::*; use test_helpers::assert_error;
#[test] #[test]
fn test_consistent_hashing() { fn test_consistent_hashing() {
const NUM_TESTS: usize = 10_000; const NUM_TESTS: usize = 10_000;
const NUM_SHARDS: usize = 10; const NUM_SHARDS: usize = 10;
let hasher = JumpHash::new(0..NUM_SHARDS); let hasher = JumpHash::new(0..NUM_SHARDS).unwrap();
// Create a HashMap<key, shard> to verify against. // Create a HashMap<key, shard> to verify against.
let mappings = (0..NUM_TESTS) let mappings = (0..NUM_TESTS)
@ -198,7 +198,7 @@ mod tests {
.all(|(&key, &value)| hasher.hash(key) == value)); .all(|(&key, &value)| hasher.hash(key) == value));
// Reinitialise the hasher with the same (default) key // Reinitialise the hasher with the same (default) key
let hasher = JumpHash::new(0..NUM_SHARDS); let hasher = JumpHash::new(0..NUM_SHARDS).unwrap();
// And assert the mappings are the same // And assert the mappings are the same
assert!(mappings assert!(mappings
@ -206,7 +206,9 @@ mod tests {
.all(|(&key, &value)| hasher.hash(key) == value)); .all(|(&key, &value)| hasher.hash(key) == value));
// Reinitialise the hasher with the a different key // Reinitialise the hasher with the a different key
let hasher = JumpHash::new(0..NUM_SHARDS).with_seed_key(&[42; 16]); let hasher = JumpHash::new(0..NUM_SHARDS)
.unwrap()
.with_seed_key(&[42; 16]);
// And assert the mappings are the NOT all same (some may be the same) // And assert the mappings are the NOT all same (some may be the same)
assert!(!mappings assert!(!mappings
@ -216,7 +218,7 @@ mod tests {
#[test] #[test]
fn test_sharder_impl() { fn test_sharder_impl() {
let hasher = JumpHash::new((0..10_000).map(Arc::new)); let hasher = JumpHash::new((0..10_000).map(Arc::new)).unwrap();
let a = hasher.shard( let a = hasher.shard(
"table", "table",
@ -261,7 +263,7 @@ mod tests {
#[test] #[test]
fn test_sharder_prefix_collision() { fn test_sharder_prefix_collision() {
let hasher = JumpHash::new((0..10_000).map(Arc::new)); let hasher = JumpHash::new((0..10_000).map(Arc::new)).unwrap();
let a = hasher.shard( let a = hasher.shard(
"a", "a",
&DatabaseName::try_from("bc").unwrap(), &DatabaseName::try_from("bc").unwrap(),
@ -289,7 +291,7 @@ mod tests {
// strategy would that accounts for this mapping change. // strategy would that accounts for this mapping change.
#[test] #[test]
fn test_key_bucket_fixture() { fn test_key_bucket_fixture() {
let hasher = JumpHash::new((0..1_000).map(Arc::new)); let hasher = JumpHash::new((0..1_000).map(Arc::new)).unwrap();
let namespace = DatabaseName::try_from("bananas").unwrap(); let namespace = DatabaseName::try_from("bananas").unwrap();
let mut batches = mutable_batch_lp::lines_to_batches("cpu a=1i", 42).unwrap(); let mut batches = mutable_batch_lp::lines_to_batches("cpu a=1i", 42).unwrap();
@ -308,7 +310,7 @@ mod tests {
#[test] #[test]
fn test_distribution() { fn test_distribution() {
let hasher = JumpHash::new((0..100).map(Arc::new)); let hasher = JumpHash::new((0..100).map(Arc::new)).unwrap();
let namespace = DatabaseName::try_from("bananas").unwrap(); let namespace = DatabaseName::try_from("bananas").unwrap();
let mut mapping = HashMap::<_, usize>::new(); let mut mapping = HashMap::<_, usize>::new();
@ -336,7 +338,7 @@ mod tests {
fn test_delete_with_table() { fn test_delete_with_table() {
let namespace = DatabaseName::try_from("bananas").unwrap(); let namespace = DatabaseName::try_from("bananas").unwrap();
let hasher = JumpHash::new((0..10_000).map(Arc::new)); let hasher = JumpHash::new((0..10_000).map(Arc::new)).unwrap();
let predicate = DeletePredicate { let predicate = DeletePredicate {
range: TimestampRange::new(1, 2), range: TimestampRange::new(1, 2),
@ -362,7 +364,7 @@ mod tests {
let namespace = DatabaseName::try_from("bananas").unwrap(); let namespace = DatabaseName::try_from("bananas").unwrap();
let shards = (0..10_000).map(Arc::new).collect::<Vec<_>>(); let shards = (0..10_000).map(Arc::new).collect::<Vec<_>>();
let hasher = JumpHash::new(shards.clone()); let hasher = JumpHash::new(shards.clone()).unwrap();
let predicate = DeletePredicate { let predicate = DeletePredicate {
range: TimestampRange::new(1, 2), range: TimestampRange::new(1, 2),
@ -373,4 +375,10 @@ mod tests {
assert_eq!(got, shards); assert_eq!(got, shards);
} }
#[test]
fn no_shards() {
let shards: iter::Empty<i32> = iter::empty();
assert_error!(JumpHash::new(shards), Error::NoShards,);
}
} }