Merge branch 'main' into dom/cached-fsm-schema
commit
a37b85804d
|
@ -981,6 +981,7 @@ dependencies = [
|
|||
"metric",
|
||||
"object_store",
|
||||
"observability_deps",
|
||||
"parking_lot",
|
||||
"parquet_file",
|
||||
"rand",
|
||||
"schema",
|
||||
|
@ -4184,9 +4185,9 @@ checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
|||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.12.0"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b90f8560ad8bd57b207b8293bc5226e48e89039a6e590c12a297d91b84c7e60"
|
||||
checksum = "978385d59daf9269189d052ca8a84c1acfd0715c0599a5d5188d4acc078ca46a"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
|
@ -4905,18 +4906,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.175"
|
||||
version = "1.0.176"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b"
|
||||
checksum = "76dc28c9523c5d70816e393136b86d48909cfb27cecaa902d338c19ed47164dc"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.175"
|
||||
version = "1.0.176"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4"
|
||||
checksum = "a4e7b8c5dc823e3b90651ff1d3808419cd14e5ad76de04feaf37da114e7a306f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -4925,9 +4926,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.103"
|
||||
version = "1.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
|
||||
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -5690,9 +5691,9 @@ checksum = "d3543ca0810e71767052bdcdd5653f23998b192642a22c5164bfa6581e40a4a2"
|
|||
|
||||
[[package]]
|
||||
name = "sysinfo"
|
||||
version = "0.29.6"
|
||||
version = "0.29.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7cb97a5a85a136d84e75d5c3cf89655090602efb1be0d8d5337b7e386af2908"
|
||||
checksum = "165d6d8539689e3d3bc8b98ac59541e1f21c7de7c85d60dc80e43ae0ed2113db"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"core-foundation-sys",
|
||||
|
|
|
@ -125,6 +125,18 @@ pub struct RouterConfig {
|
|||
default_value = "1"
|
||||
)]
|
||||
pub rpc_write_replicas: NonZeroUsize,
|
||||
|
||||
/// Specify the (discrete) slices of time in which the router's write
|
||||
/// request failures must exceed the write client's maximum error ratio of
|
||||
/// 80% for a downstream RPC write handler to be considered in the unhealthy
|
||||
/// state.
|
||||
#[clap(
|
||||
long = "rpc-write-health-error-window-seconds",
|
||||
env = "INFLUXDB_IOX_RPC_WRITE_HEALTH_ERROR_WINDOW_SECONDS",
|
||||
default_value = "5",
|
||||
value_parser = parse_duration
|
||||
)]
|
||||
pub rpc_write_health_error_window_seconds: Duration,
|
||||
}
|
||||
|
||||
/// Map a string containing an integer number of seconds into a [`Duration`].
|
||||
|
|
|
@ -30,6 +30,7 @@ trace = { version = "0.1.0", path = "../trace" }
|
|||
tracker = { path = "../tracker" }
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
parking_lot = "0.12.1"
|
||||
|
||||
[dev-dependencies]
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use compactor_scheduler::{create_scheduler, PartitionsSource, Scheduler};
|
||||
use compactor_scheduler::{create_scheduler, Scheduler};
|
||||
use data_types::CompactionLevel;
|
||||
use object_store::memory::InMemory;
|
||||
|
||||
|
@ -64,7 +64,7 @@ use super::{
|
|||
logging::LoggingPartitionsSourceWrapper, metrics::MetricsPartitionsSourceWrapper,
|
||||
not_empty::NotEmptyPartitionsSourceWrapper,
|
||||
randomize_order::RandomizeOrderPartitionsSourcesWrapper,
|
||||
scheduled::ScheduledPartitionsSource,
|
||||
scheduled::ScheduledPartitionsSource, PartitionsSource,
|
||||
},
|
||||
post_classification_partition_filter::{
|
||||
logging::LoggingPostClassificationFilterWrapper,
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::{collections::VecDeque, fmt::Display, sync::Arc};
|
||||
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use futures::{stream::BoxStream, StreamExt};
|
||||
|
||||
use super::super::partition_files_source::rate_limit::RateLimit;
|
||||
use super::super::{
|
||||
partition_files_source::rate_limit::RateLimit, partitions_source::PartitionsSource,
|
||||
};
|
||||
use super::PartitionStream;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -41,7 +42,7 @@ impl<T> PartitionStream for EndlessPartititionStream<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
fn stream(&self) -> BoxStream<'_, PartitionId> {
|
||||
fn stream(&self) -> BoxStream<'_, CompactionJob> {
|
||||
let source = Arc::clone(&self.source);
|
||||
|
||||
// Note: we use a VecDeque as a buffer so we can preserve the order and cheaply remove the first element without
|
||||
|
@ -79,9 +80,9 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
|
||||
use super::*;
|
||||
use super::{super::super::partitions_source::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -92,9 +93,9 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_stream() {
|
||||
let ids = vec![
|
||||
PartitionId::new(1),
|
||||
PartitionId::new(3),
|
||||
PartitionId::new(2),
|
||||
CompactionJob::new(PartitionId::new(1)),
|
||||
CompactionJob::new(PartitionId::new(3)),
|
||||
CompactionJob::new(PartitionId::new(2)),
|
||||
];
|
||||
let stream = EndlessPartititionStream::new(MockPartitionsSource::new(ids.clone()));
|
||||
|
||||
|
@ -103,13 +104,7 @@ mod tests {
|
|||
// we need to limit the stream at one point because it is endless
|
||||
assert_eq!(
|
||||
stream.stream().take(5).collect::<Vec<_>>().await,
|
||||
vec![
|
||||
PartitionId::new(1),
|
||||
PartitionId::new(3),
|
||||
PartitionId::new(2),
|
||||
PartitionId::new(1),
|
||||
PartitionId::new(3)
|
||||
],
|
||||
[&ids[..], &ids[..2]].concat(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::fmt::{Debug, Display};
|
||||
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use futures::stream::BoxStream;
|
||||
|
||||
pub mod endless;
|
||||
|
@ -8,8 +8,8 @@ pub mod once;
|
|||
|
||||
/// Source for partitions.
|
||||
pub trait PartitionStream: Debug + Display + Send + Sync {
|
||||
/// Create new source stream of partitions.
|
||||
/// Create new source stream of compaction job.
|
||||
///
|
||||
/// This stream may be endless.
|
||||
fn stream(&self) -> BoxStream<'_, PartitionId>;
|
||||
fn stream(&self) -> BoxStream<'_, CompactionJob>;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use futures::{stream::BoxStream, StreamExt};
|
||||
|
||||
use super::PartitionStream;
|
||||
use super::{super::partitions_source::PartitionsSource, PartitionStream};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OncePartititionStream<T>
|
||||
|
@ -38,7 +37,7 @@ impl<T> PartitionStream for OncePartititionStream<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
fn stream(&self) -> BoxStream<'_, PartitionId> {
|
||||
fn stream(&self) -> BoxStream<'_, CompactionJob> {
|
||||
let source = Arc::clone(&self.source);
|
||||
futures::stream::once(async move { futures::stream::iter(source.fetch().await) })
|
||||
.flatten()
|
||||
|
@ -48,9 +47,9 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
|
||||
use super::*;
|
||||
use super::{super::super::partitions_source::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -61,9 +60,9 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_stream() {
|
||||
let ids = vec![
|
||||
PartitionId::new(1),
|
||||
PartitionId::new(3),
|
||||
PartitionId::new(2),
|
||||
CompactionJob::new(PartitionId::new(1)),
|
||||
CompactionJob::new(PartitionId::new(3)),
|
||||
CompactionJob::new(PartitionId::new(2)),
|
||||
];
|
||||
let stream = OncePartititionStream::new(MockPartitionsSource::new(ids.clone()));
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use observability_deps::tracing::{info, warn};
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LoggingPartitionsSourceWrapper<T>
|
||||
where
|
||||
|
@ -36,7 +37,7 @@ impl<T> PartitionsSource for LoggingPartitionsSourceWrapper<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
let partitions = self.inner.fetch().await;
|
||||
info!(n_partitions = partitions.len(), "Fetch partitions",);
|
||||
if partitions.is_empty() {
|
||||
|
@ -48,10 +49,10 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use test_helpers::tracing::TracingCapture;
|
||||
|
||||
use super::*;
|
||||
use super::{super::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -74,9 +75,9 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_fetch_some() {
|
||||
let p_1 = PartitionId::new(5);
|
||||
let p_2 = PartitionId::new(1);
|
||||
let p_3 = PartitionId::new(12);
|
||||
let p_1 = CompactionJob::new(PartitionId::new(5));
|
||||
let p_2 = CompactionJob::new(PartitionId::new(1));
|
||||
let p_3 = CompactionJob::new(PartitionId::new(12));
|
||||
let partitions = vec![p_1, p_2, p_3];
|
||||
|
||||
let source =
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use metric::{Registry, U64Counter};
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
const METRIC_NAME_PARTITIONS_FETCH_COUNT: &str = "iox_compactor_partitions_fetch_count";
|
||||
const METRIC_NAME_PARTITIONS_COUNT: &str = "iox_compactor_partitions_count";
|
||||
|
||||
|
@ -58,7 +59,7 @@ impl<T> PartitionsSource for MetricsPartitionsSourceWrapper<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
let partitions = self.inner.fetch().await;
|
||||
self.partitions_fetch_counter.inc(1);
|
||||
self.partitions_counter.inc(partitions.len() as u64);
|
||||
|
@ -68,10 +69,10 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use metric::assert_counter;
|
||||
|
||||
use super::*;
|
||||
use super::{super::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -85,9 +86,9 @@ mod tests {
|
|||
async fn test_fetch() {
|
||||
let registry = Registry::new();
|
||||
let partitions = vec![
|
||||
PartitionId::new(5),
|
||||
PartitionId::new(1),
|
||||
PartitionId::new(12),
|
||||
CompactionJob::new(PartitionId::new(5)),
|
||||
CompactionJob::new(PartitionId::new(1)),
|
||||
CompactionJob::new(PartitionId::new(12)),
|
||||
];
|
||||
let source = MetricsPartitionsSourceWrapper::new(
|
||||
MockPartitionsSource::new(partitions.clone()),
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
use async_trait::async_trait;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
/// A mock structure for providing [partitions](CompactionJob).
|
||||
#[derive(Debug)]
|
||||
pub struct MockPartitionsSource {
|
||||
partitions: Mutex<Vec<CompactionJob>>,
|
||||
}
|
||||
|
||||
impl MockPartitionsSource {
|
||||
#[allow(dead_code)]
|
||||
/// Create a new MockPartitionsSource.
|
||||
pub fn new(partitions: Vec<CompactionJob>) -> Self {
|
||||
Self {
|
||||
partitions: Mutex::new(partitions),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set CompactionJobs for MockPartitionsSource.
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
pub fn set(&self, partitions: Vec<CompactionJob>) {
|
||||
*self.partitions.lock() = partitions;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for MockPartitionsSource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "mock")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PartitionsSource for MockPartitionsSource {
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
self.partitions.lock().clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use data_types::PartitionId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(MockPartitionsSource::new(vec![]).to_string(), "mock",);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fetch() {
|
||||
let source = MockPartitionsSource::new(vec![]);
|
||||
assert_eq!(source.fetch().await, vec![],);
|
||||
|
||||
let p_1 = CompactionJob::new(PartitionId::new(5));
|
||||
let p_2 = CompactionJob::new(PartitionId::new(1));
|
||||
let p_3 = CompactionJob::new(PartitionId::new(12));
|
||||
let parts = vec![p_1, p_2, p_3];
|
||||
source.set(parts.clone());
|
||||
assert_eq!(source.fetch().await, parts,);
|
||||
}
|
||||
}
|
|
@ -1,8 +1,38 @@
|
|||
//! Abstractions that provide functionality over a [`PartitionsSource`](compactor_scheduler::PartitionsSource) of PartitionIds.
|
||||
//! Abstractions that provide functionality over a [`PartitionsSource`] of PartitionIds.
|
||||
//!
|
||||
//! These abstractions are for actions taken in a compactor using the PartitionIds received from a compactor_scheduler.
|
||||
//! These abstractions are for actions taken in a compactor using the CompactionJobs received from a compactor_scheduler.
|
||||
pub mod logging;
|
||||
pub mod metrics;
|
||||
pub mod mock;
|
||||
pub mod not_empty;
|
||||
pub mod randomize_order;
|
||||
pub mod scheduled;
|
||||
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
|
||||
/// A source of partitions, noted by [`CompactionJob`](compactor_scheduler::CompactionJob), that may potentially need compacting.
|
||||
#[async_trait]
|
||||
pub trait PartitionsSource: Debug + Display + Send + Sync {
|
||||
/// Get partition IDs.
|
||||
///
|
||||
/// This method performs retries.
|
||||
///
|
||||
/// This should only perform basic, efficient filtering. It MUST NOT inspect individual parquet files.
|
||||
async fn fetch(&self) -> Vec<CompactionJob>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> PartitionsSource for Arc<T>
|
||||
where
|
||||
T: PartitionsSource + ?Sized,
|
||||
{
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
self.as_ref().fetch().await
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::{fmt::Display, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use iox_time::TimeProvider;
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NotEmptyPartitionsSourceWrapper<T>
|
||||
where
|
||||
|
@ -42,7 +43,7 @@ impl<T> PartitionsSource for NotEmptyPartitionsSourceWrapper<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
loop {
|
||||
let res = self.inner.fetch().await;
|
||||
if !res.is_empty() {
|
||||
|
@ -55,11 +56,11 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use compactor_test_utils::AssertFutureExt;
|
||||
use data_types::PartitionId;
|
||||
use iox_time::{MockProvider, Time};
|
||||
|
||||
use super::*;
|
||||
use super::{super::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -90,7 +91,7 @@ mod tests {
|
|||
fut.assert_pending().await;
|
||||
|
||||
// insert data but system is still throttled
|
||||
let p = PartitionId::new(5);
|
||||
let p = CompactionJob::new(PartitionId::new(5));
|
||||
let parts = vec![p];
|
||||
inner.set(parts.clone());
|
||||
fut.assert_pending().await;
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::PartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RandomizeOrderPartitionsSourcesWrapper<T>
|
||||
where
|
||||
|
@ -37,7 +38,7 @@ impl<T> PartitionsSource for RandomizeOrderPartitionsSourcesWrapper<T>
|
|||
where
|
||||
T: PartitionsSource,
|
||||
{
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
let mut partitions = self.inner.fetch().await;
|
||||
let mut rng = StdRng::seed_from_u64(self.seed);
|
||||
partitions.shuffle(&mut rng);
|
||||
|
@ -47,9 +48,9 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use compactor_scheduler::MockPartitionsSource;
|
||||
use data_types::PartitionId;
|
||||
|
||||
use super::*;
|
||||
use super::{super::mock::MockPartitionsSource, *};
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
|
@ -67,21 +68,27 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_fetch_some() {
|
||||
let p_1 = PartitionId::new(5);
|
||||
let p_2 = PartitionId::new(1);
|
||||
let p_3 = PartitionId::new(12);
|
||||
let partitions = vec![p_1, p_2, p_3];
|
||||
let p_1 = CompactionJob::new(PartitionId::new(5));
|
||||
let p_2 = CompactionJob::new(PartitionId::new(1));
|
||||
let p_3 = CompactionJob::new(PartitionId::new(12));
|
||||
let partitions = vec![p_1.clone(), p_2.clone(), p_3.clone()];
|
||||
|
||||
// shuffles
|
||||
let source = RandomizeOrderPartitionsSourcesWrapper::new(
|
||||
MockPartitionsSource::new(partitions.clone()),
|
||||
123,
|
||||
);
|
||||
assert_eq!(source.fetch().await, vec![p_3, p_2, p_1,],);
|
||||
assert_eq!(
|
||||
source.fetch().await,
|
||||
vec![p_3.clone(), p_2.clone(), p_1.clone(),],
|
||||
);
|
||||
|
||||
// is deterministic in same source
|
||||
for _ in 0..100 {
|
||||
assert_eq!(source.fetch().await, vec![p_3, p_2, p_1,],);
|
||||
assert_eq!(
|
||||
source.fetch().await,
|
||||
vec![p_3.clone(), p_2.clone(), p_1.clone(),],
|
||||
);
|
||||
}
|
||||
|
||||
// is deterministic with new source
|
||||
|
@ -90,7 +97,10 @@ mod tests {
|
|||
MockPartitionsSource::new(partitions.clone()),
|
||||
123,
|
||||
);
|
||||
assert_eq!(source.fetch().await, vec![p_3, p_2, p_1,],);
|
||||
assert_eq!(
|
||||
source.fetch().await,
|
||||
vec![p_3.clone(), p_2.clone(), p_1.clone(),],
|
||||
);
|
||||
}
|
||||
|
||||
// different seed => different output
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use compactor_scheduler::{CompactionJob, PartitionsSource, Scheduler};
|
||||
use data_types::PartitionId;
|
||||
use compactor_scheduler::{CompactionJob, Scheduler};
|
||||
|
||||
use super::PartitionsSource;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ScheduledPartitionsSource {
|
||||
|
@ -17,9 +18,8 @@ impl ScheduledPartitionsSource {
|
|||
|
||||
#[async_trait]
|
||||
impl PartitionsSource for ScheduledPartitionsSource {
|
||||
async fn fetch(&self) -> Vec<PartitionId> {
|
||||
let job: Vec<CompactionJob> = self.scheduler.get_jobs().await;
|
||||
job.into_iter().map(|job| job.partition_id).collect()
|
||||
async fn fetch(&self) -> Vec<CompactionJob> {
|
||||
self.scheduler.get_jobs().await
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use chrono::Utc;
|
||||
use compactor_scheduler::CompactionJob;
|
||||
use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use iox_query::exec::query_tracing::send_metrics_to_tracing;
|
||||
|
@ -36,7 +37,7 @@ pub async fn compact(
|
|||
components
|
||||
.partition_stream
|
||||
.stream()
|
||||
.map(|partition_id| {
|
||||
.map(|job| {
|
||||
let components = Arc::clone(components);
|
||||
|
||||
// A root span is created for each partition. Later this can be linked to the
|
||||
|
@ -48,7 +49,7 @@ pub async fn compact(
|
|||
|
||||
compact_partition(
|
||||
span,
|
||||
partition_id,
|
||||
job,
|
||||
partition_timeout,
|
||||
Arc::clone(&df_semaphore),
|
||||
components,
|
||||
|
@ -61,11 +62,12 @@ pub async fn compact(
|
|||
|
||||
async fn compact_partition(
|
||||
mut span: SpanRecorder,
|
||||
partition_id: PartitionId,
|
||||
job: CompactionJob,
|
||||
partition_timeout: Duration,
|
||||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
components: Arc<Components>,
|
||||
) {
|
||||
let partition_id = job.partition_id;
|
||||
info!(partition_id = partition_id.get(), timeout = ?partition_timeout, "compact partition",);
|
||||
span.set_metadata("partition_id", partition_id.get().to_string());
|
||||
let scratchpad = components.scratchpad_gen.pad();
|
||||
|
@ -76,7 +78,7 @@ async fn compact_partition(
|
|||
async {
|
||||
try_compact_partition(
|
||||
span,
|
||||
partition_id,
|
||||
job.clone(),
|
||||
df_semaphore,
|
||||
components,
|
||||
scratchpad,
|
||||
|
@ -203,12 +205,13 @@ async fn compact_partition(
|
|||
/// . Round 2 happens or not depends on the stop condition
|
||||
async fn try_compact_partition(
|
||||
span: SpanRecorder,
|
||||
partition_id: PartitionId,
|
||||
job: CompactionJob,
|
||||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
components: Arc<Components>,
|
||||
scratchpad_ctx: Arc<dyn Scratchpad>,
|
||||
transmit_progress_signal: Sender<bool>,
|
||||
) -> Result<(), DynError> {
|
||||
let partition_id = job.partition_id;
|
||||
let mut files = components.partition_files_source.fetch(partition_id).await;
|
||||
let partition_info = components.partition_info_source.fetch(partition_id).await?;
|
||||
let transmit_progress_signal = Arc::new(transmit_progress_signal);
|
||||
|
@ -269,12 +272,13 @@ async fn try_compact_partition(
|
|||
let df_semaphore = Arc::clone(&df_semaphore);
|
||||
let transmit_progress_signal = Arc::clone(&transmit_progress_signal);
|
||||
let scratchpad = Arc::clone(&scratchpad_ctx);
|
||||
let job = job.clone();
|
||||
let branch_span = round_span.child("branch");
|
||||
|
||||
async move {
|
||||
execute_branch(
|
||||
branch_span,
|
||||
partition_id,
|
||||
job,
|
||||
branch,
|
||||
df_semaphore,
|
||||
components,
|
||||
|
@ -298,7 +302,7 @@ async fn try_compact_partition(
|
|||
#[allow(clippy::too_many_arguments)]
|
||||
async fn execute_branch(
|
||||
span: SpanRecorder,
|
||||
partition_id: PartitionId,
|
||||
job: CompactionJob,
|
||||
branch: Vec<ParquetFile>,
|
||||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
components: Arc<Components>,
|
||||
|
@ -418,7 +422,7 @@ async fn execute_branch(
|
|||
// files and update the upgraded files
|
||||
let (created_files, upgraded_files) = update_catalog(
|
||||
Arc::clone(&components),
|
||||
partition_id,
|
||||
job.clone(),
|
||||
&saved_parquet_file_state,
|
||||
files_to_delete,
|
||||
upgrade,
|
||||
|
@ -627,13 +631,14 @@ async fn fetch_and_save_parquet_file_state(
|
|||
/// Return created and upgraded files
|
||||
async fn update_catalog(
|
||||
components: Arc<Components>,
|
||||
partition_id: PartitionId,
|
||||
job: CompactionJob,
|
||||
saved_parquet_file_state: &SavedParquetFileState,
|
||||
files_to_delete: Vec<ParquetFile>,
|
||||
files_to_upgrade: Vec<ParquetFile>,
|
||||
file_params_to_create: Vec<ParquetFileParams>,
|
||||
target_level: CompactionLevel,
|
||||
) -> Result<(Vec<ParquetFile>, Vec<ParquetFile>), DynError> {
|
||||
let partition_id = job.partition_id;
|
||||
let current_parquet_file_state =
|
||||
fetch_and_save_parquet_file_state(&components, partition_id).await;
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ pub(crate) use local_scheduler::{
|
|||
|
||||
// partitions_source trait
|
||||
mod partitions_source;
|
||||
pub use partitions_source::*;
|
||||
pub(crate) use partitions_source::*;
|
||||
|
||||
// scheduler trait and associated types
|
||||
mod scheduler;
|
||||
|
|
|
@ -9,7 +9,7 @@ use parking_lot::Mutex;
|
|||
|
||||
/// A source of partitions, noted by [`PartitionId`](data_types::PartitionId), that may potentially need compacting.
|
||||
#[async_trait]
|
||||
pub trait PartitionsSource: Debug + Display + Send + Sync {
|
||||
pub(crate) trait PartitionsSource: Debug + Display + Send + Sync {
|
||||
/// Get partition IDs.
|
||||
///
|
||||
/// This method performs retries.
|
||||
|
@ -28,19 +28,19 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub use mock::MockPartitionsSource;
|
||||
pub(crate) use mock::MockPartitionsSource;
|
||||
mod mock {
|
||||
use super::*;
|
||||
|
||||
/// A mock structure for providing [partitions](PartitionId).
|
||||
#[derive(Debug)]
|
||||
pub struct MockPartitionsSource {
|
||||
pub(crate) struct MockPartitionsSource {
|
||||
partitions: Mutex<Vec<PartitionId>>,
|
||||
}
|
||||
|
||||
impl MockPartitionsSource {
|
||||
/// Create a new MockPartitionsSource.
|
||||
pub fn new(partitions: Vec<PartitionId>) -> Self {
|
||||
pub(crate) fn new(partitions: Vec<PartitionId>) -> Self {
|
||||
Self {
|
||||
partitions: Mutex::new(partitions),
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ mod mock {
|
|||
|
||||
/// Set PartitionIds for MockPartitionsSource.
|
||||
#[allow(dead_code)] // not used anywhere
|
||||
pub fn set(&self, partitions: Vec<PartitionId>) {
|
||||
pub(crate) fn set(&self, partitions: Vec<PartitionId>) {
|
||||
*self.partitions.lock() = partitions;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ impl std::fmt::Display for SchedulerConfig {
|
|||
}
|
||||
|
||||
/// Job assignment for a given partition.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct CompactionJob {
|
||||
#[allow(dead_code)]
|
||||
/// Unique identifier for this job.
|
||||
|
|
|
@ -16,7 +16,7 @@ parquet_file = { path = "../parquet_file" }
|
|||
object_store = { workspace=true }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
thiserror = "1.0.44"
|
||||
tokio = { version = "1.29" }
|
||||
tokio-util = { version = "0.7.8" }
|
||||
|
|
|
@ -10,7 +10,7 @@ bytes = "1.4"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
url = "2.4.0"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
@ -67,7 +67,7 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.16.0"
|
||||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.7.0"
|
||||
thiserror = "1.0.44"
|
||||
|
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
|
|||
predicates = "3.0.3"
|
||||
pretty_assertions = "1.4.0"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
serde = "1.0.175"
|
||||
serde = "1.0.176"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
|
||||
insta = { version = "1", features = ["yaml"] }
|
||||
|
|
|
@ -488,6 +488,7 @@ impl Config {
|
|||
rpc_write_timeout_seconds: Duration::new(3, 0),
|
||||
rpc_write_replicas: 1.try_into().unwrap(),
|
||||
rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
|
||||
rpc_write_health_error_window_seconds: Duration::from_secs(5),
|
||||
};
|
||||
|
||||
// create a CompactorConfig for the all in one server based on
|
||||
|
|
|
@ -24,7 +24,7 @@ prost = "0.11"
|
|||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
tokio-stream = "0.1.13"
|
||||
thiserror = "1.0.44"
|
||||
|
|
|
@ -40,6 +40,18 @@ impl WriteOperation {
|
|||
}
|
||||
}
|
||||
|
||||
/// Do NOT remove this test annotation. This constructor exists to by-pass
|
||||
/// safety invariant assertions for testing code only.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn new_empty_invalid(namespace: NamespaceId, partition_key: PartitionKey) -> Self {
|
||||
Self {
|
||||
namespace,
|
||||
tables: Default::default(),
|
||||
partition_key,
|
||||
span_context: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The namespace which the write is
|
||||
pub fn namespace(&self) -> NamespaceId {
|
||||
self.namespace
|
||||
|
|
|
@ -75,12 +75,12 @@ where
|
|||
"Number of WAL files that have started to be replayed",
|
||||
)
|
||||
.recorder(&[]);
|
||||
let op_count_metric = metrics
|
||||
.register_metric::<U64Counter>(
|
||||
"ingester_wal_replay_ops",
|
||||
"Number of operations successfully replayed from the WAL",
|
||||
)
|
||||
.recorder(&[]);
|
||||
let op_count_metric = metrics.register_metric::<U64Counter>(
|
||||
"ingester_wal_replay_ops",
|
||||
"Number of operations replayed from the WAL",
|
||||
);
|
||||
let ok_op_count_metric = op_count_metric.recorder(&[("outcome", "success")]);
|
||||
let empty_op_count_metric = op_count_metric.recorder(&[("outcome", "skipped_empty")]);
|
||||
|
||||
let n_files = files.len();
|
||||
info!(n_files, "found wal files for replay");
|
||||
|
@ -112,7 +112,7 @@ where
|
|||
);
|
||||
|
||||
// Replay this segment file
|
||||
match replay_file(reader, sink, &op_count_metric).await? {
|
||||
match replay_file(reader, sink, &ok_op_count_metric, &empty_op_count_metric).await? {
|
||||
v @ Some(_) => max_sequence = max_sequence.max(v),
|
||||
None => {
|
||||
// This file was empty and should be deleted.
|
||||
|
@ -181,7 +181,8 @@ where
|
|||
async fn replay_file<T>(
|
||||
file: wal::ClosedSegmentFileReader,
|
||||
sink: &T,
|
||||
op_count_metric: &U64Counter,
|
||||
ok_op_count_metric: &U64Counter,
|
||||
empty_op_count_metric: &U64Counter,
|
||||
) -> Result<Option<SequenceNumber>, WalReplayError>
|
||||
where
|
||||
T: DmlSink,
|
||||
|
@ -212,6 +213,12 @@ where
|
|||
let namespace_id = NamespaceId::new(op.database_id);
|
||||
let partition_key = PartitionKey::from(op.partition_key);
|
||||
|
||||
if batches.is_empty() {
|
||||
warn!(%namespace_id, "encountered wal op containing no table data, skipping replay");
|
||||
empty_op_count_metric.inc(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
let op = WriteOperation::new(
|
||||
namespace_id,
|
||||
batches
|
||||
|
@ -255,7 +262,7 @@ where
|
|||
.await
|
||||
.map_err(Into::<DmlError>::into)?;
|
||||
|
||||
op_count_metric.inc(1);
|
||||
ok_op_count_metric.inc(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -368,12 +375,22 @@ mod tests {
|
|||
),
|
||||
);
|
||||
|
||||
// Emulate a mid-write crash by inserting an op with no data
|
||||
let empty_op = WriteOperation::new_empty_invalid(
|
||||
ARBITRARY_NAMESPACE_ID,
|
||||
ARBITRARY_PARTITION_KEY.clone(),
|
||||
);
|
||||
|
||||
// The write portion of this test.
|
||||
//
|
||||
// Write two ops, rotate the file, and write a third op.
|
||||
{
|
||||
let inner =
|
||||
Arc::new(MockDmlSink::default().with_apply_return(vec![Ok(()), Ok(()), Ok(())]));
|
||||
let inner = Arc::new(MockDmlSink::default().with_apply_return(vec![
|
||||
Ok(()),
|
||||
Ok(()),
|
||||
Ok(()),
|
||||
Ok(()),
|
||||
]));
|
||||
let wal = Wal::new(dir.path())
|
||||
.await
|
||||
.expect("failed to initialise WAL");
|
||||
|
@ -405,8 +422,14 @@ mod tests {
|
|||
.await
|
||||
.expect("wal should not error");
|
||||
|
||||
// Write the empty op
|
||||
wal_sink
|
||||
.apply(IngestOp::Write(empty_op))
|
||||
.await
|
||||
.expect("wal should not error");
|
||||
|
||||
// Assert the mock inner sink saw the calls
|
||||
assert_eq!(inner.get_calls().len(), 3);
|
||||
assert_eq!(inner.get_calls().len(), 4);
|
||||
}
|
||||
|
||||
// Reinitialise the WAL
|
||||
|
@ -448,14 +471,15 @@ mod tests {
|
|||
|
||||
assert_eq!(max_sequence_number, Some(SequenceNumber::new(43)));
|
||||
|
||||
// Assert the ops were pushed into the DmlSink exactly as generated.
|
||||
// Assert the ops were pushed into the DmlSink exactly as generated,
|
||||
// barring the empty op which is skipped
|
||||
let ops = mock_iter.sink.get_calls();
|
||||
assert_matches!(
|
||||
&*ops,
|
||||
&[
|
||||
IngestOp::Write(ref w1),
|
||||
IngestOp::Write(ref w2),
|
||||
IngestOp::Write(ref w3)
|
||||
IngestOp::Write(ref w3),
|
||||
] => {
|
||||
assert_write_ops_eq(w1.clone(), op1);
|
||||
assert_write_ops_eq(w2.clone(), op2);
|
||||
|
@ -493,9 +517,16 @@ mod tests {
|
|||
let ops = metrics
|
||||
.get_instrument::<Metric<U64Counter>>("ingester_wal_replay_ops")
|
||||
.expect("file counter not found")
|
||||
.get_observer(&Attributes::from([]))
|
||||
.get_observer(&Attributes::from(&[("outcome", "success")]))
|
||||
.expect("attributes not found")
|
||||
.fetch();
|
||||
assert_eq!(ops, 3);
|
||||
let ops = metrics
|
||||
.get_instrument::<Metric<U64Counter>>("ingester_wal_replay_ops")
|
||||
.expect("file counter not found")
|
||||
.get_observer(&Attributes::from(&[("outcome", "skipped_empty")]))
|
||||
.expect("attributes not found")
|
||||
.fetch();
|
||||
assert_eq!(ops, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
|
|||
regex = "1.9"
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.7.6"
|
||||
|
|
|
@ -20,7 +20,7 @@ predicate = { path = "../predicate" }
|
|||
query_functions = { path = "../query_functions" }
|
||||
regex = "1"
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
thiserror = "1.0"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ log = "0.4"
|
|||
parking_lot = "0.12"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
serde_urlencoded = "0.7.0"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
|
|
|
@ -240,6 +240,7 @@ pub async fn create_router_server_type(
|
|||
ingester_connections,
|
||||
router_config.rpc_write_replicas,
|
||||
&metrics,
|
||||
router_config.rpc_write_health_error_window_seconds,
|
||||
);
|
||||
let rpc_writer = InstrumentationDecorator::new("rpc_writer", &metrics, rpc_writer);
|
||||
|
||||
|
|
|
@ -130,15 +130,16 @@ impl<T> RpcWrite<T> {
|
|||
endpoints: impl IntoIterator<Item = (T, N)>,
|
||||
n_copies: NonZeroUsize,
|
||||
metrics: &metric::Registry,
|
||||
error_window: Duration,
|
||||
) -> Self
|
||||
where
|
||||
T: Send + Sync + Debug + 'static,
|
||||
N: Into<Arc<str>>,
|
||||
{
|
||||
let endpoints = Balancer::new(
|
||||
endpoints
|
||||
.into_iter()
|
||||
.map(|(client, name)| CircuitBreakingClient::new(client, name.into())),
|
||||
endpoints.into_iter().map(|(client, name)| {
|
||||
CircuitBreakingClient::new(client, name.into(), error_window)
|
||||
}),
|
||||
Some(metrics),
|
||||
);
|
||||
|
||||
|
@ -383,6 +384,7 @@ mod tests {
|
|||
|
||||
const NAMESPACE_NAME: &str = "bananas";
|
||||
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
|
||||
const ARBITRARY_TEST_ERROR_WINDOW: Duration = Duration::from_secs(5);
|
||||
|
||||
// Start a new `NamespaceSchema` with only the given ID; the rest of the fields are arbitrary.
|
||||
fn new_empty_namespace_schema() -> Arc<NamespaceSchema> {
|
||||
|
@ -460,6 +462,7 @@ mod tests {
|
|||
[(Arc::clone(&client), "mock client")],
|
||||
1.try_into().unwrap(),
|
||||
&metric::Registry::default(),
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
);
|
||||
|
||||
// Drive the RPC writer
|
||||
|
@ -522,6 +525,7 @@ mod tests {
|
|||
],
|
||||
1.try_into().unwrap(),
|
||||
&metric::Registry::default(),
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
);
|
||||
|
||||
// Drive the RPC writer
|
||||
|
@ -590,6 +594,7 @@ mod tests {
|
|||
],
|
||||
1.try_into().unwrap(),
|
||||
&metric::Registry::default(),
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
);
|
||||
|
||||
// Drive the RPC writer
|
||||
|
@ -640,7 +645,10 @@ mod tests {
|
|||
circuit_1.set_healthy(false);
|
||||
|
||||
let got = make_request(
|
||||
[CircuitBreakingClient::new(client_1, "client_1").with_circuit_breaker(circuit_1)],
|
||||
[
|
||||
CircuitBreakingClient::new(client_1, "client_1", ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
@ -661,7 +669,10 @@ mod tests {
|
|||
circuit_1.set_healthy(true);
|
||||
|
||||
let got = make_request(
|
||||
[CircuitBreakingClient::new(client_1, "client_1").with_circuit_breaker(circuit_1)],
|
||||
[
|
||||
CircuitBreakingClient::new(client_1, "client_1", ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
@ -682,7 +693,10 @@ mod tests {
|
|||
circuit_1.set_healthy(true);
|
||||
|
||||
let got = make_request(
|
||||
[CircuitBreakingClient::new(client_1, "client_1").with_circuit_breaker(circuit_1)],
|
||||
[
|
||||
CircuitBreakingClient::new(client_1, "client_1", ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
],
|
||||
1,
|
||||
)
|
||||
.await;
|
||||
|
@ -714,8 +728,10 @@ mod tests {
|
|||
|
||||
let got = make_request(
|
||||
[
|
||||
CircuitBreakingClient::new(client_1, "client_1").with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(client_2, "client_2").with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(client_1, "client_1", ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(client_2, "client_2", ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit_2),
|
||||
],
|
||||
2, // 2 copies required
|
||||
)
|
||||
|
@ -738,10 +754,18 @@ mod tests {
|
|||
|
||||
let got = make_request(
|
||||
[
|
||||
CircuitBreakingClient::new(Arc::clone(&client_1), "client_1")
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(Arc::clone(&client_2), "client_2")
|
||||
.with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_1),
|
||||
"client_1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_2),
|
||||
"client_2",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_2),
|
||||
],
|
||||
2, // 2 copies required
|
||||
)
|
||||
|
@ -774,10 +798,18 @@ mod tests {
|
|||
circuit_2.set_healthy(true);
|
||||
|
||||
let mut clients = vec![
|
||||
CircuitBreakingClient::new(Arc::clone(&client_1), "client_1")
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(Arc::clone(&client_2), "client_2")
|
||||
.with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_1),
|
||||
"client_1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_2),
|
||||
"client_2",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_2),
|
||||
];
|
||||
|
||||
// The order should never affect the outcome.
|
||||
|
@ -820,10 +852,18 @@ mod tests {
|
|||
|
||||
let got = make_request(
|
||||
[
|
||||
CircuitBreakingClient::new(Arc::clone(&client_1), "client_1")
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(Arc::clone(&client_2), "client_2")
|
||||
.with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_1),
|
||||
"client_1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_2),
|
||||
"client_2",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_2),
|
||||
],
|
||||
2, // 2 copies required
|
||||
)
|
||||
|
@ -876,12 +916,24 @@ mod tests {
|
|||
circuit_3.set_healthy(true);
|
||||
|
||||
let mut clients = vec![
|
||||
CircuitBreakingClient::new(Arc::clone(&client_1), "client_1")
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(Arc::clone(&client_2), "client_2")
|
||||
.with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(Arc::clone(&client_3), "client_3")
|
||||
.with_circuit_breaker(circuit_3),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_1),
|
||||
"client_1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_1),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_2),
|
||||
"client_2",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_2),
|
||||
CircuitBreakingClient::new(
|
||||
Arc::clone(&client_3),
|
||||
"client_3",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(circuit_3),
|
||||
];
|
||||
|
||||
// The order should never affect the outcome.
|
||||
|
@ -983,7 +1035,7 @@ mod tests {
|
|||
async move {
|
||||
let endpoints = upstreams.into_iter()
|
||||
.map(|(circuit, client)| {
|
||||
CircuitBreakingClient::new(client, "bananas")
|
||||
CircuitBreakingClient::new(client, "bananas",ARBITRARY_TEST_ERROR_WINDOW)
|
||||
.with_circuit_breaker(circuit)
|
||||
});
|
||||
|
||||
|
|
|
@ -219,6 +219,8 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
|
||||
const ARBITRARY_TEST_ERROR_WINDOW: Duration = Duration::from_secs(5);
|
||||
|
||||
/// No healthy nodes prevents a snapshot from being returned.
|
||||
#[tokio::test]
|
||||
async fn test_balancer_empty_iter() {
|
||||
|
@ -227,16 +229,22 @@ mod tests {
|
|||
let circuit_err_1 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_1.set_healthy(false);
|
||||
circuit_err_1.set_should_probe(false);
|
||||
let client_err_1 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
let client_err_1 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
|
||||
let circuit_err_2 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_2.set_healthy(false);
|
||||
circuit_err_2.set_should_probe(false);
|
||||
let client_err_2 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
let client_err_2 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
|
||||
assert_eq!(circuit_err_1.ok_count(), 0);
|
||||
assert_eq!(circuit_err_2.ok_count(), 0);
|
||||
|
@ -254,21 +262,31 @@ mod tests {
|
|||
let circuit_err_1 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_1.set_healthy(false);
|
||||
circuit_err_1.set_should_probe(true);
|
||||
let client_err_1 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
let client_err_1 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
|
||||
let circuit_err_2 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_2.set_healthy(false);
|
||||
circuit_err_2.set_should_probe(true);
|
||||
let client_err_2 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
let client_err_2 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
let circuit_ok = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_ok.set_healthy(true);
|
||||
circuit_ok.set_should_probe(false);
|
||||
let client_ok = CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
let client_ok = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
|
||||
let balancer = Balancer::new([client_err_1, client_err_2, client_ok], None);
|
||||
|
||||
|
@ -323,21 +341,31 @@ mod tests {
|
|||
let circuit_err_1 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_1.set_healthy(false);
|
||||
circuit_err_1.set_should_probe(false);
|
||||
let client_err_1 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
let client_err_1 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
|
||||
let circuit_err_2 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_2.set_healthy(false);
|
||||
circuit_err_2.set_should_probe(false);
|
||||
let client_err_2 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
let client_err_2 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
|
||||
let circuit_ok = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_ok.set_healthy(true);
|
||||
let client_ok = CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
let client_ok = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
|
||||
assert_eq!(circuit_ok.ok_count(), 0);
|
||||
assert_eq!(circuit_err_1.ok_count(), 0);
|
||||
|
@ -383,8 +411,12 @@ mod tests {
|
|||
let circuit = Arc::new(MockCircuitBreaker::default());
|
||||
circuit.set_healthy(false);
|
||||
circuit.set_should_probe(false);
|
||||
let client = CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit));
|
||||
let client = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit));
|
||||
|
||||
assert_eq!(circuit.ok_count(), 0);
|
||||
|
||||
|
@ -432,21 +464,30 @@ mod tests {
|
|||
// two returns a healthy state, one is unhealthy.
|
||||
let circuit_err = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err.set_healthy(false);
|
||||
let client_err =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err));
|
||||
let client_err = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err));
|
||||
|
||||
let circuit_ok_1 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_ok_1.set_healthy(true);
|
||||
let client_ok_1 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok_1));
|
||||
let client_ok_1 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok_1));
|
||||
|
||||
let circuit_ok_2 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_ok_2.set_healthy(true);
|
||||
let client_ok_2 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok_2));
|
||||
let client_ok_2 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bananas",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok_2));
|
||||
|
||||
let balancer = Balancer::new([client_err, client_ok_1, client_ok_2], None);
|
||||
|
||||
|
@ -478,22 +519,31 @@ mod tests {
|
|||
let circuit_err_1 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_1.set_healthy(false);
|
||||
circuit_err_1.set_should_probe(false);
|
||||
let client_err_1 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bad-client-1")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
let client_err_1 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bad-client-1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_1));
|
||||
|
||||
let circuit_err_2 = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err_2.set_healthy(false);
|
||||
circuit_err_2.set_should_probe(true);
|
||||
let client_err_2 =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bad-client-2")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
let client_err_2 = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bad-client-2",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err_2));
|
||||
|
||||
let circuit_ok = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_ok.set_healthy(true);
|
||||
let client_ok =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "ok-client")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
let client_ok = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"ok-client",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_ok));
|
||||
|
||||
let balancer = Balancer::new([client_err_1, client_err_2, client_ok], None);
|
||||
|
||||
|
@ -558,9 +608,12 @@ mod tests {
|
|||
let circuit_err = Arc::new(MockCircuitBreaker::default());
|
||||
circuit_err.set_healthy(false);
|
||||
circuit_err.set_should_probe(false);
|
||||
let client_err =
|
||||
CircuitBreakingClient::new(Arc::new(MockWriteClient::default()), "bad-client-1")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err));
|
||||
let client_err = CircuitBreakingClient::new(
|
||||
Arc::new(MockWriteClient::default()),
|
||||
"bad-client-1",
|
||||
ARBITRARY_TEST_ERROR_WINDOW,
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_err));
|
||||
|
||||
let balancer = Balancer::new([client_err], None);
|
||||
assert!(balancer.endpoints().is_none());
|
||||
|
|
|
@ -16,13 +16,10 @@ use tokio::{
|
|||
};
|
||||
|
||||
/// The limit on the ratio of the number of error requests to the number of
|
||||
/// successful requests within an [`ERROR_WINDOW`] interval to be considered
|
||||
/// healthy.
|
||||
/// successful requests within the configured error window to be considered
|
||||
/// healthy. If updating this value, remember to update the documentation
|
||||
/// in the CLI flag for the configurable error window.
|
||||
const MAX_ERROR_RATIO: f32 = 0.8;
|
||||
/// The (discrete) slices of time in which the error ratio must exceed
|
||||
/// [`MAX_ERROR_RATIO`] to cause the [`CircuitBreaker`] to transition to the
|
||||
/// unhealthy state.
|
||||
const ERROR_WINDOW: Duration = Duration::from_secs(5);
|
||||
/// The maximum number of probe requests to allow when in an unhealthy state.
|
||||
const NUM_PROBES: u64 = 10;
|
||||
/// The length of time during which up to [`NUM_PROBES`] are allowed when in an
|
||||
|
@ -64,7 +61,7 @@ const PROBE_INTERVAL: Duration = Duration::from_secs(1);
|
|||
/// # Implementation
|
||||
///
|
||||
/// The circuit breaker is considered unhealthy when 80% ([`MAX_ERROR_RATIO`])
|
||||
/// of requests within a 5 second window [`ERROR_WINDOW`] fail. The breaker
|
||||
/// of requests within the configured error window fail. The breaker
|
||||
/// becomes healthy again when the error rate falls below 80%
|
||||
/// ([`MAX_ERROR_RATIO`]) for the, at most, 10 probe requests ([`NUM_PROBES`])
|
||||
/// allowed through within 1 second ([`PROBE_INTERVAL`]).
|
||||
|
@ -80,7 +77,7 @@ const PROBE_INTERVAL: Duration = Duration::from_secs(1);
|
|||
/// breaker is considered healthy when the ratio of the number of error
|
||||
/// requests to the number of successful requests in the current window is
|
||||
/// less than [`MAX_ERROR_RATIO`]. If the ratio of errors exceeds
|
||||
/// [`MAX_ERROR_RATIO`] within a single [`ERROR_WINDOW`] duration of time,
|
||||
/// [`MAX_ERROR_RATIO`] within a single error window,
|
||||
/// the circuit breaker is then considered to be in the "open/unhealthy"
|
||||
/// state.
|
||||
///
|
||||
|
@ -102,12 +99,12 @@ const PROBE_INTERVAL: Duration = Duration::from_secs(1);
|
|||
///
|
||||
/// Successful requests and errors are recorded when passed to
|
||||
/// [`CircuitBreaker::observe()`]. These counters are reset at intervals of
|
||||
/// [`ERROR_WINDOW`], meaning that the ratio of errors must exceed
|
||||
/// the configured error window, meaning that the ratio of errors must exceed
|
||||
/// [`MAX_ERROR_RATIO`] within a single window to open the circuit breaker to
|
||||
/// start being considered unhealthy.
|
||||
///
|
||||
/// A floor of at least `MAX_ERROR_RATIO * NUM_PROBES` must be observed per
|
||||
/// [`ERROR_WINDOW`] before the circuit breaker opens / becomes unhealthy.
|
||||
/// A floor of at least [`MAX_ERROR_RATIO`] * [`NUM_PROBES`] must be observed per
|
||||
/// error window before the circuit breaker opens / becomes unhealthy.
|
||||
///
|
||||
/// Error ratios are measured on every call to [`CircuitBreaker::is_healthy`],
|
||||
/// which should be done before determining whether to perform each request.
|
||||
|
@ -121,8 +118,8 @@ const PROBE_INTERVAL: Duration = Duration::from_secs(1);
|
|||
///
|
||||
/// ## Probing / Closing (becoming healthy)
|
||||
///
|
||||
/// Once a circuit breaker transitions to "open/unhealthy", up to [`NUM_PROBES`]
|
||||
/// requests are allowed per [`PROBE_INTERVAL`], as determined by calling
|
||||
/// Once a circuit breaker transitions to "open/unhealthy", up to 10 [`NUM_PROBES`]
|
||||
/// requests are allowed per 1s [`PROBE_INTERVAL`], as determined by calling
|
||||
/// [`CircuitBreaker::should_probe`] before sending a request. This is referred
|
||||
/// to as "probing", allowing the client to discover the state of the
|
||||
/// (potentially unavailable) remote while bounding the number of requests that
|
||||
|
@ -145,7 +142,7 @@ const PROBE_INTERVAL: Duration = Duration::from_secs(1);
|
|||
#[derive(Debug)]
|
||||
pub struct CircuitBreaker {
|
||||
/// Counters tracking the number of [`Ok`] and [`Err`] observed in the
|
||||
/// current [`ERROR_WINDOW`].
|
||||
/// current error window.
|
||||
///
|
||||
/// When the total number of requests ([`RequestCounterValue::total()`]) is
|
||||
/// less than [`NUM_PROBES`], the circuit is in the "probing" regime. When
|
||||
|
@ -157,7 +154,7 @@ pub struct CircuitBreaker {
|
|||
/// should be allowed and resetting the [`PROBE_INTERVAL`].
|
||||
probes: Mutex<ProbeState>,
|
||||
|
||||
/// A task to reset the request count at intervals of [`ERROR_WINDOW`].
|
||||
/// A task to reset the request count at the configured error window.
|
||||
reset_task: JoinHandle<()>,
|
||||
|
||||
/// A string description of the endpoint this [`CircuitBreaker`] models.
|
||||
|
@ -178,13 +175,13 @@ struct ProbeState {
|
|||
}
|
||||
|
||||
impl CircuitBreaker {
|
||||
pub(crate) fn new(endpoint: impl Into<Arc<str>>) -> Self {
|
||||
pub(crate) fn new(endpoint: impl Into<Arc<str>>, error_window: Duration) -> Self {
|
||||
let requests = Arc::new(RequestCounter::default());
|
||||
let s = Self {
|
||||
requests: Arc::clone(&requests),
|
||||
probes: Mutex::new(ProbeState::default()),
|
||||
reset_task: tokio::spawn(async move {
|
||||
let mut ticker = tokio::time::interval(ERROR_WINDOW);
|
||||
let mut ticker = tokio::time::interval(error_window);
|
||||
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
|
@ -366,7 +363,7 @@ fn is_healthy(counts: RequestCounterValue) -> bool {
|
|||
|
||||
/// Resets the absolute request counter values if the current circuit state is
|
||||
/// "closed" (healthy, not probing) at the time of the call, such that the there
|
||||
/// must be NUM_PROBES * MAX_ERROR_RATIO number of failed requests to open the
|
||||
/// must be [`NUM_PROBES`] * [`MAX_ERROR_RATIO`] number of failed requests to open the
|
||||
/// circuit (mark as unhealthy).
|
||||
///
|
||||
/// Retains the closed/healthy state of the circuit. This is NOT an atomic
|
||||
|
@ -466,7 +463,7 @@ mod tests {
|
|||
|
||||
/// Return a new [`CircuitBreaker`] with the reset ticker disabled.
|
||||
fn new_no_reset() -> CircuitBreaker {
|
||||
let c = CircuitBreaker::new("bananas");
|
||||
let c = CircuitBreaker::new("bananas", Duration::from_secs(5));
|
||||
c.reset_task.abort();
|
||||
c
|
||||
}
|
||||
|
@ -599,13 +596,13 @@ mod tests {
|
|||
}
|
||||
|
||||
/// The circuit is marked unhealthy if the error rate exceeds
|
||||
/// MAX_ERROR_RATIO within a single ERROR_WINDOW (approximately).
|
||||
/// MAX_ERROR_RATIO within a single error window (approximately).
|
||||
///
|
||||
/// This test ensures the counter reset logic prevents errors from different
|
||||
/// ERROR_WINDOW periods from changing the circuit to open/unhealthy.
|
||||
/// error window periods from changing the circuit to open/unhealthy.
|
||||
#[tokio::test]
|
||||
async fn test_periodic_counter_reset() {
|
||||
let c = CircuitBreaker::new("bananas");
|
||||
let c = CircuitBreaker::new("bananas", Duration::from_secs(5));
|
||||
|
||||
// Assert the circuit breaker as healthy.
|
||||
assert!(c.is_healthy());
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::{fmt::Debug, sync::Arc};
|
||||
use std::{fmt::Debug, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use generated_types::influxdata::iox::ingester::v1::WriteRequest;
|
||||
|
@ -51,9 +51,13 @@ pub(super) struct CircuitBreakingClient<T, C = CircuitBreaker> {
|
|||
}
|
||||
|
||||
impl<T> CircuitBreakingClient<T> {
|
||||
pub(super) fn new(inner: T, endpoint_name: impl Into<Arc<str>>) -> Self {
|
||||
pub(super) fn new(
|
||||
inner: T,
|
||||
endpoint_name: impl Into<Arc<str>>,
|
||||
error_window: Duration,
|
||||
) -> Self {
|
||||
let endpoint_name = endpoint_name.into();
|
||||
let state = CircuitBreaker::new(Arc::clone(&endpoint_name));
|
||||
let state = CircuitBreaker::new(Arc::clone(&endpoint_name), error_window);
|
||||
state.set_healthy();
|
||||
Self {
|
||||
inner,
|
||||
|
@ -181,8 +185,12 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_healthy() {
|
||||
let circuit_breaker = Arc::new(MockCircuitBreaker::default());
|
||||
let wrapper = CircuitBreakingClient::new(MockWriteClient::default(), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_breaker));
|
||||
let wrapper = CircuitBreakingClient::new(
|
||||
MockWriteClient::default(),
|
||||
"bananas",
|
||||
Duration::from_secs(5),
|
||||
)
|
||||
.with_circuit_breaker(Arc::clone(&circuit_breaker));
|
||||
|
||||
circuit_breaker.set_healthy(true);
|
||||
assert_eq!(wrapper.is_healthy(), circuit_breaker.is_healthy());
|
||||
|
@ -213,8 +221,9 @@ mod tests {
|
|||
.into_iter(),
|
||||
)),
|
||||
);
|
||||
let wrapper = CircuitBreakingClient::new(Arc::clone(&mock_client), "bananas")
|
||||
.with_circuit_breaker(Arc::clone(&circuit_breaker));
|
||||
let wrapper =
|
||||
CircuitBreakingClient::new(Arc::clone(&mock_client), "bananas", Duration::from_secs(5))
|
||||
.with_circuit_breaker(Arc::clone(&circuit_breaker));
|
||||
|
||||
assert_eq!(circuit_breaker.ok_count(), 0);
|
||||
assert_eq!(circuit_breaker.err_count(), 0);
|
||||
|
|
|
@ -34,6 +34,7 @@ pub const TEST_RETENTION_PERIOD: Duration = Duration::from_secs(3600);
|
|||
pub struct TestContextBuilder {
|
||||
namespace_autocreation: MissingNamespaceAction,
|
||||
single_tenancy: bool,
|
||||
rpc_write_error_window: Duration,
|
||||
}
|
||||
|
||||
impl Default for TestContextBuilder {
|
||||
|
@ -41,6 +42,7 @@ impl Default for TestContextBuilder {
|
|||
Self {
|
||||
namespace_autocreation: MissingNamespaceAction::Reject,
|
||||
single_tenancy: false,
|
||||
rpc_write_error_window: Duration::from_secs(5),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -79,6 +81,7 @@ impl TestContextBuilder {
|
|||
self.single_tenancy,
|
||||
catalog,
|
||||
metrics,
|
||||
self.rpc_write_error_window,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
@ -94,6 +97,7 @@ pub struct TestContext {
|
|||
|
||||
namespace_autocreation: MissingNamespaceAction,
|
||||
single_tenancy: bool,
|
||||
rpc_write_error_window: Duration,
|
||||
}
|
||||
|
||||
// This mass of words is certainly a downside of chained handlers.
|
||||
|
@ -133,12 +137,14 @@ impl TestContext {
|
|||
single_tenancy: bool,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
rpc_write_error_window: Duration,
|
||||
) -> Self {
|
||||
let client = Arc::new(MockWriteClient::default());
|
||||
let rpc_writer = RpcWrite::new(
|
||||
[(Arc::clone(&client), "mock client")],
|
||||
1.try_into().unwrap(),
|
||||
&metrics,
|
||||
rpc_write_error_window,
|
||||
);
|
||||
|
||||
let ns_cache = Arc::new(ReadThroughCache::new(
|
||||
|
@ -195,6 +201,7 @@ impl TestContext {
|
|||
|
||||
namespace_autocreation,
|
||||
single_tenancy,
|
||||
rpc_write_error_window,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,6 +215,7 @@ impl TestContext {
|
|||
self.single_tenancy,
|
||||
catalog,
|
||||
metrics,
|
||||
self.rpc_write_error_window,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ bytes = "1.4"
|
|||
futures = "0.3"
|
||||
prost = "0.11"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tonic = { workspace = true }
|
||||
|
|
|
@ -30,7 +30,7 @@ pin-project = "1.1"
|
|||
prost = "0.11"
|
||||
regex = "1.9.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.103"
|
||||
serde_json = "1.0.104"
|
||||
snafu = "0.7"
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tonic = { workspace = true }
|
||||
|
|
|
@ -19,7 +19,7 @@ tokio = { version = "1.29", features = ["macros", "parking_lot", "sync", "time"]
|
|||
tokio-util = { version = "0.7.8" }
|
||||
trace = { path = "../trace"}
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
sysinfo = "0.29.6"
|
||||
sysinfo = "0.29.7"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.7.0"
|
||||
|
|
Loading…
Reference in New Issue