1518 lines
52 KiB
Rust
1518 lines
52 KiB
Rust
use async_trait::async_trait;
|
|
use data_types::{SequenceNumber, ShardIndex};
|
|
use dml::{DmlMeta, DmlOperation, DmlWrite};
|
|
use futures::stream::BoxStream;
|
|
use std::{
|
|
collections::{BTreeMap, BTreeSet},
|
|
fmt::{Debug, Display, Formatter},
|
|
io::Error,
|
|
};
|
|
|
|
/// Generic boxed error type that is used in this crate.
|
|
///
|
|
/// The dynamic boxing makes it easier to deal with error from different implementations.
|
|
#[derive(Debug)]
|
|
pub struct WriteBufferError {
|
|
inner: Box<dyn std::error::Error + Sync + Send>,
|
|
kind: WriteBufferErrorKind,
|
|
}
|
|
|
|
impl WriteBufferError {
|
|
pub fn new(
|
|
kind: WriteBufferErrorKind,
|
|
e: impl Into<Box<dyn std::error::Error + Sync + Send>>,
|
|
) -> Self {
|
|
Self {
|
|
inner: e.into(),
|
|
kind,
|
|
}
|
|
}
|
|
|
|
pub fn invalid_data(e: impl Into<Box<dyn std::error::Error + Sync + Send>>) -> Self {
|
|
Self::new(WriteBufferErrorKind::InvalidData, e)
|
|
}
|
|
|
|
pub fn invalid_input(e: impl Into<Box<dyn std::error::Error + Sync + Send>>) -> Self {
|
|
Self::new(WriteBufferErrorKind::InvalidInput, e)
|
|
}
|
|
|
|
pub fn sequence_number_after_watermark(
|
|
e: impl Into<Box<dyn std::error::Error + Sync + Send>>,
|
|
) -> Self {
|
|
Self::new(WriteBufferErrorKind::SequenceNumberAfterWatermark, e)
|
|
}
|
|
|
|
pub fn sequence_number_no_longer_exists(
|
|
e: impl Into<Box<dyn std::error::Error + Sync + Send>>,
|
|
) -> Self {
|
|
Self::new(WriteBufferErrorKind::SequenceNumberNoLongerExists, e)
|
|
}
|
|
|
|
pub fn unknown(e: impl Into<Box<dyn std::error::Error + Sync + Send>>) -> Self {
|
|
Self::new(WriteBufferErrorKind::Unknown, e)
|
|
}
|
|
|
|
/// Returns the kind of error this was
|
|
pub fn kind(&self) -> WriteBufferErrorKind {
|
|
self.kind
|
|
}
|
|
|
|
/// Returns the inner error
|
|
pub fn inner(&self) -> &dyn std::error::Error {
|
|
self.inner.as_ref()
|
|
}
|
|
}
|
|
|
|
impl Display for WriteBufferError {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "WriteBufferError({:?}): {}", self.kind, self.inner)
|
|
}
|
|
}
|
|
|
|
impl std::error::Error for WriteBufferError {}
|
|
|
|
impl From<std::io::Error> for WriteBufferError {
|
|
fn from(e: Error) -> Self {
|
|
Self {
|
|
inner: Box::new(e),
|
|
kind: WriteBufferErrorKind::IO,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<rskafka::client::error::Error> for WriteBufferError {
|
|
fn from(e: rskafka::client::error::Error) -> Self {
|
|
Self {
|
|
inner: Box::new(e),
|
|
kind: WriteBufferErrorKind::IO,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<rskafka::client::producer::Error> for WriteBufferError {
|
|
fn from(e: rskafka::client::producer::Error) -> Self {
|
|
Self {
|
|
inner: Box::new(e),
|
|
kind: WriteBufferErrorKind::IO,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<String> for WriteBufferError {
|
|
fn from(e: String) -> Self {
|
|
Self {
|
|
inner: e.into(),
|
|
kind: WriteBufferErrorKind::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<&'static str> for WriteBufferError {
|
|
fn from(e: &'static str) -> Self {
|
|
Self {
|
|
inner: e.into(),
|
|
kind: WriteBufferErrorKind::Unknown,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
pub enum WriteBufferErrorKind {
|
|
/// This operation failed for an unknown reason
|
|
Unknown,
|
|
|
|
/// This operation was provided with invalid input data
|
|
InvalidInput,
|
|
|
|
/// This operation encountered invalid data
|
|
InvalidData,
|
|
|
|
/// A fatal IO error occurred - non-fatal errors should be retried internally
|
|
IO,
|
|
|
|
/// The sequence number that we are trying to read is newer than high watermark.
|
|
SequenceNumberAfterWatermark,
|
|
|
|
/// The sequence number that we are trying to read no longer exists.
|
|
///
|
|
/// The sequence number is known according to the high watermark but was either removed
|
|
/// manually or due to the retention policy.
|
|
SequenceNumberNoLongerExists,
|
|
}
|
|
|
|
/// Writing to a Write Buffer takes a [`DmlWrite`] and returns the [`DmlMeta`] for the
|
|
/// payload that was written
|
|
#[async_trait]
|
|
pub trait WriteBufferWriting: Sync + Send + Debug + 'static {
|
|
/// List all known shard indexes/indices.
|
|
///
|
|
/// This set not empty.
|
|
fn shard_indexes(&self) -> BTreeSet<ShardIndex>;
|
|
|
|
/// Send a [`DmlOperation`] to the write buffer using the specified shard index.
|
|
///
|
|
/// The [`dml::DmlMeta`] will be propagated where applicable
|
|
///
|
|
/// This call may "async block" (i.e. be in a pending state) to accumulate multiple operations
|
|
/// into a single batch. After this method returns the operation was actually written (i.e. it
|
|
/// is NOT buffered any longer). You may use [`flush`](Self::flush) to trigger an early
|
|
/// submission (e.g. before some linger time expired), which can be helpful for controlled
|
|
/// shutdown.
|
|
///
|
|
/// Returns the metadata that was written.
|
|
async fn store_operation(
|
|
&self,
|
|
shard_index: ShardIndex,
|
|
operation: DmlOperation,
|
|
) -> Result<DmlMeta, WriteBufferError>;
|
|
|
|
/// Sends line protocol to the write buffer - primarily intended for testing
|
|
async fn store_lp(
|
|
&self,
|
|
shard_index: ShardIndex,
|
|
lp: &str,
|
|
default_time: i64,
|
|
) -> Result<DmlMeta, WriteBufferError> {
|
|
let tables = mutable_batch_lp::lines_to_batches(lp, default_time)
|
|
.map_err(WriteBufferError::invalid_input)?;
|
|
|
|
self.store_operation(
|
|
shard_index,
|
|
DmlOperation::Write(DmlWrite::new("test_db", tables, None, Default::default())),
|
|
)
|
|
.await
|
|
}
|
|
|
|
/// Flush all currently blocking store operations ([`store_operation`](Self::store_operation) /
|
|
/// [`store_lp`](Self::store_lp)).
|
|
///
|
|
/// This call is pending while outstanding data is being submitted and will return AFTER the
|
|
/// flush completed. However you still need to poll the store operations to get the metadata
|
|
/// for every write.
|
|
async fn flush(&self) -> Result<(), WriteBufferError>;
|
|
|
|
/// Return type (like `"mock"` or `"kafka"`) of this writer.
|
|
fn type_name(&self) -> &'static str;
|
|
}
|
|
|
|
/// Handles a stream of a specific shard.
|
|
///
|
|
/// This can be used to consume data via a stream or to seek the stream to a given sequence number.
|
|
#[async_trait]
|
|
pub trait WriteBufferStreamHandler: Sync + Send + Debug + 'static {
|
|
/// Stream that produces DML operations.
|
|
///
|
|
/// Note that due to the mutable borrow, it is not possible to have multiple streams from the
|
|
/// same [`WriteBufferStreamHandler`] instance at the same time. If all streams are dropped and
|
|
/// requested again, the last sequence number of the old streams will be the start sequence
|
|
/// number for the new streams. If you want to prevent that either create a new
|
|
/// [`WriteBufferStreamHandler`] or use [`seek`](Self::seek).
|
|
///
|
|
/// If the sequence number that the stream wants to read is unknown (either because it is in
|
|
/// the future or because some retention policy removed it already), the stream will return an
|
|
/// error with [`WriteBufferErrorKind::SequenceNumberAfterWatermark`] /
|
|
/// [`WriteBufferErrorKind::SequenceNumberNoLongerExists`] and will end immediately.
|
|
async fn stream(&mut self) -> BoxStream<'static, Result<DmlOperation, WriteBufferError>>;
|
|
|
|
/// Seek shard to given sequence number. The next output of related streams will be an
|
|
/// entry with at least the given sequence number (the actual sequence number might be skipped
|
|
/// due to "holes" in the stream).
|
|
///
|
|
/// Note that due to the mutable borrow, it is not possible to seek while streams exists.
|
|
async fn seek(&mut self, sequence_number: SequenceNumber) -> Result<(), WriteBufferError>;
|
|
|
|
/// Reset the shard to whatever is the earliest number available in the retained write
|
|
/// buffer. Useful to restart if [`WriteBufferErrorKind::SequenceNumberNoLongerExists`] is
|
|
/// returned from [`stream`](Self::stream) but that isn't a problem.
|
|
fn reset_to_earliest(&mut self);
|
|
}
|
|
|
|
#[async_trait]
|
|
impl WriteBufferStreamHandler for Box<dyn WriteBufferStreamHandler> {
|
|
async fn stream(&mut self) -> BoxStream<'static, Result<DmlOperation, WriteBufferError>> {
|
|
self.as_mut().stream().await
|
|
}
|
|
|
|
async fn seek(&mut self, sequence_number: SequenceNumber) -> Result<(), WriteBufferError> {
|
|
self.as_mut().seek(sequence_number).await
|
|
}
|
|
|
|
fn reset_to_earliest(&mut self) {
|
|
self.as_mut().reset_to_earliest()
|
|
}
|
|
}
|
|
|
|
/// Produce streams (one per shard) of [`DmlWrite`]s.
|
|
#[async_trait]
|
|
pub trait WriteBufferReading: Sync + Send + Debug + 'static {
|
|
/// List all known shard indexes/indices.
|
|
///
|
|
/// This set not empty.
|
|
fn shard_indexes(&self) -> BTreeSet<ShardIndex>;
|
|
|
|
/// Get stream handler for a dedicated shard.
|
|
///
|
|
/// Handlers do NOT share any state (e.g. last sequence number).
|
|
async fn stream_handler(
|
|
&self,
|
|
shard_index: ShardIndex,
|
|
) -> Result<Box<dyn WriteBufferStreamHandler>, WriteBufferError>;
|
|
|
|
/// Get stream handlers for all stream.
|
|
async fn stream_handlers(
|
|
&self,
|
|
) -> Result<BTreeMap<ShardIndex, Box<dyn WriteBufferStreamHandler>>, WriteBufferError> {
|
|
let mut handlers = BTreeMap::new();
|
|
|
|
for shard_index in self.shard_indexes() {
|
|
handlers.insert(shard_index, self.stream_handler(shard_index).await?);
|
|
}
|
|
|
|
Ok(handlers)
|
|
}
|
|
|
|
/// Get high watermark (= what we believe is the next sequence number to be added).
|
|
///
|
|
/// Can be used to calculate lag. Note that since the watermark is "next sequence number to
|
|
/// be added", it starts at 0 and after the entry with sequence number 0 is added to the
|
|
/// buffer, it is 1.
|
|
async fn fetch_high_watermark(
|
|
&self,
|
|
shard_index: ShardIndex,
|
|
) -> Result<SequenceNumber, WriteBufferError>;
|
|
|
|
/// Return type (like `"mock"` or `"kafka"`) of this reader.
|
|
fn type_name(&self) -> &'static str;
|
|
}
|
|
|
|
pub mod test_utils {
|
|
//! Generic tests for all write buffer implementations.
|
|
use super::{
|
|
WriteBufferError, WriteBufferReading, WriteBufferStreamHandler, WriteBufferWriting,
|
|
};
|
|
use crate::core::WriteBufferErrorKind;
|
|
use async_trait::async_trait;
|
|
use data_types::{PartitionKey, SequenceNumber, ShardIndex};
|
|
use dml::{test_util::assert_write_op_eq, DmlMeta, DmlOperation, DmlWrite};
|
|
use futures::{stream::FuturesUnordered, Stream, StreamExt, TryStreamExt};
|
|
use iox_time::{Time, TimeProvider};
|
|
use std::{
|
|
collections::{BTreeSet, HashSet},
|
|
convert::TryFrom,
|
|
num::NonZeroU32,
|
|
sync::Arc,
|
|
time::Duration,
|
|
};
|
|
use trace::{ctx::SpanContext, span::Span, RingBufferTraceCollector};
|
|
use uuid::Uuid;
|
|
|
|
/// Generated random topic name for testing.
|
|
pub fn random_topic_name() -> String {
|
|
format!("test_topic_{}", Uuid::new_v4())
|
|
}
|
|
|
|
/// Adapter to make a concrete write buffer implementation work w/ [`perform_generic_tests`].
|
|
#[async_trait]
|
|
pub trait TestAdapter: Send + Sync {
|
|
/// The context type that is used.
|
|
type Context: TestContext;
|
|
|
|
/// Create a new context.
|
|
///
|
|
/// This will be called multiple times during the test suite. Each resulting context must
|
|
/// represent an isolated environment.
|
|
async fn new_context(&self, n_shards: NonZeroU32) -> Self::Context {
|
|
self.new_context_with_time(n_shards, Arc::new(iox_time::SystemProvider::new()))
|
|
.await
|
|
}
|
|
|
|
async fn new_context_with_time(
|
|
&self,
|
|
n_shards: NonZeroU32,
|
|
time_provider: Arc<dyn TimeProvider>,
|
|
) -> Self::Context;
|
|
}
|
|
|
|
/// Context used during testing.
|
|
///
|
|
/// Represents an isolated environment. Actions like shard creations and writes must not
|
|
/// leak across context boundaries.
|
|
#[async_trait]
|
|
pub trait TestContext: Send + Sync {
|
|
/// Write buffer writer implementation specific to this context and adapter.
|
|
type Writing: WriteBufferWriting;
|
|
|
|
/// Write buffer reader implementation specific to this context and adapter.
|
|
type Reading: WriteBufferReading;
|
|
|
|
/// Create new writer.
|
|
async fn writing(&self, creation_config: bool) -> Result<Self::Writing, WriteBufferError>;
|
|
|
|
/// Create new reader.
|
|
async fn reading(&self, creation_config: bool) -> Result<Self::Reading, WriteBufferError>;
|
|
|
|
/// Trace collector that is used in this context.
|
|
fn trace_collector(&self) -> Arc<RingBufferTraceCollector>;
|
|
}
|
|
|
|
/// Generic test suite that must be passed by all proper write buffer implementations.
|
|
///
|
|
/// See [`TestAdapter`] for how to make a concrete write buffer implementation work with this
|
|
/// test suite.
|
|
///
|
|
/// Note that you might need more tests on top of this to assert specific implementation
|
|
/// behaviors, edge cases, and error handling.
|
|
pub async fn perform_generic_tests<T>(adapter: T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
test_single_stream_io(&adapter).await;
|
|
test_multi_stream_io(&adapter).await;
|
|
test_multi_shard_io(&adapter).await;
|
|
test_multi_writer_multi_reader(&adapter).await;
|
|
test_seek(&adapter).await;
|
|
test_reset_to_earliest(&adapter).await;
|
|
test_watermark(&adapter).await;
|
|
test_timestamp(&adapter).await;
|
|
test_timestamp_batching(&adapter).await;
|
|
test_shard_auto_creation(&adapter).await;
|
|
test_shard_indexes(&adapter).await;
|
|
test_span_context(&adapter).await;
|
|
test_unknown_shard_write(&adapter).await;
|
|
test_multi_namespaces(&adapter).await;
|
|
test_flush(&adapter).await;
|
|
}
|
|
|
|
/// Writes line protocol and returns the [`DmlWrite`] that was written
|
|
pub async fn write(
|
|
namespace: &str,
|
|
writer: &impl WriteBufferWriting,
|
|
lp: &str,
|
|
shard_index: ShardIndex,
|
|
partition_key: PartitionKey,
|
|
span_context: Option<&SpanContext>,
|
|
) -> DmlWrite {
|
|
let tables = mutable_batch_lp::lines_to_batches(lp, 0).unwrap();
|
|
let write = DmlWrite::new(
|
|
namespace,
|
|
tables,
|
|
Some(partition_key),
|
|
DmlMeta::unsequenced(span_context.cloned()),
|
|
);
|
|
let operation = DmlOperation::Write(write);
|
|
|
|
let meta = writer
|
|
.store_operation(shard_index, operation.clone())
|
|
.await
|
|
.unwrap();
|
|
|
|
let mut write = match operation {
|
|
DmlOperation::Write(write) => write,
|
|
_ => unreachable!(),
|
|
};
|
|
|
|
write.set_meta(meta);
|
|
write
|
|
}
|
|
|
|
/// Test IO with a single writer and single reader stream.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - streams process data in order
|
|
/// - readers can handle the "pending" state w/o erroring
|
|
/// - readers unblock after being in "pending" state
|
|
async fn test_single_stream_io<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let entry_1 = "upc user=1 100";
|
|
let entry_2 = "upc user=2 200";
|
|
let entry_3 = "upc user=3 300";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let shard_index = set_pop_first(&mut reader.shard_indexes()).unwrap();
|
|
let mut stream_handler = reader.stream_handler(shard_index).await.unwrap();
|
|
let mut stream = stream_handler.stream().await;
|
|
|
|
// empty stream is pending
|
|
assert_stream_pending(&mut stream).await;
|
|
|
|
// adding content allows us to get results
|
|
let w1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_1,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w1);
|
|
|
|
// stream is pending again
|
|
assert_stream_pending(&mut stream).await;
|
|
|
|
// adding more data unblocks the stream
|
|
let w2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_2,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w3 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_3,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w2);
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w3);
|
|
|
|
// stream is pending again
|
|
assert_stream_pending(&mut stream).await;
|
|
}
|
|
|
|
/// Tests multiple subsequently created streams from a single [`WriteBufferStreamHandler`].
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - readers remember their sequence number (and "pending" state) even when streams are dropped
|
|
/// - state is not shared between handlers
|
|
async fn test_multi_stream_io<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let entry_1 = "upc user=1 100";
|
|
let entry_2 = "upc user=2 200";
|
|
let entry_3 = "upc user=3 300";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let shard_index = ShardIndex::new(0);
|
|
|
|
let w1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_1,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_2,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w3 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_3,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
// creating stream, drop stream, re-create it => still starts at first entry
|
|
let shard_index = set_pop_first(&mut reader.shard_indexes()).unwrap();
|
|
let mut stream_handler = reader.stream_handler(shard_index).await.unwrap();
|
|
let stream = stream_handler.stream();
|
|
drop(stream);
|
|
let mut stream = stream_handler.stream().await;
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w1);
|
|
|
|
// re-creating stream after reading remembers sequence number, but wait a bit to provoke
|
|
// the stream to buffer some entries
|
|
tokio::time::sleep(Duration::from_millis(10)).await;
|
|
drop(stream);
|
|
let mut stream = stream_handler.stream().await;
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w2);
|
|
assert_write_op_eq(&stream.next().await.unwrap().unwrap(), &w3);
|
|
|
|
// re-creating stream after reading everything makes it pending
|
|
drop(stream);
|
|
let mut stream = stream_handler.stream().await;
|
|
assert_stream_pending(&mut stream).await;
|
|
|
|
// use a different handler => stream starts from beginning
|
|
let mut stream_handler2 = reader.stream_handler(shard_index).await.unwrap();
|
|
let mut stream2 = stream_handler2.stream().await;
|
|
assert_write_op_eq(&stream2.next().await.unwrap().unwrap(), &w1);
|
|
assert_stream_pending(&mut stream).await;
|
|
}
|
|
|
|
/// Test single reader-writer IO w/ multiple shards.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - writes go to and reads come from the right shard, aka that shards provide a
|
|
/// namespace-like isolation
|
|
/// - "pending" states are specific to a shard
|
|
async fn test_multi_shard_io<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(2).unwrap()).await;
|
|
|
|
let entry_1 = "upc user=1 100";
|
|
let entry_2 = "upc user=2 200";
|
|
let entry_3 = "upc user=3 300";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
// check that we have two different shard indexes
|
|
let mut shard_indexes = reader.shard_indexes();
|
|
assert_eq!(shard_indexes.len(), 2);
|
|
let shard_index_1 = set_pop_first(&mut shard_indexes).unwrap();
|
|
let shard_index_2 = set_pop_first(&mut shard_indexes).unwrap();
|
|
assert_ne!(shard_index_1, shard_index_2);
|
|
|
|
let mut stream_handler_1 = reader.stream_handler(shard_index_1).await.unwrap();
|
|
let mut stream_handler_2 = reader.stream_handler(shard_index_2).await.unwrap();
|
|
let mut stream_1 = stream_handler_1.stream().await;
|
|
let mut stream_2 = stream_handler_2.stream().await;
|
|
|
|
// empty streams are pending
|
|
assert_stream_pending(&mut stream_1).await;
|
|
assert_stream_pending(&mut stream_2).await;
|
|
|
|
// entries arrive at the right target stream
|
|
let w1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_1,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
assert_write_op_eq(&stream_1.next().await.unwrap().unwrap(), &w1);
|
|
assert_stream_pending(&mut stream_2).await;
|
|
|
|
let w2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_2,
|
|
shard_index_2,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
assert_stream_pending(&mut stream_1).await;
|
|
assert_write_op_eq(&stream_2.next().await.unwrap().unwrap(), &w2);
|
|
|
|
let w3 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_3,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
assert_stream_pending(&mut stream_2).await;
|
|
assert_write_op_eq(&stream_1.next().await.unwrap().unwrap(), &w3);
|
|
|
|
// streams are pending again
|
|
assert_stream_pending(&mut stream_1).await;
|
|
assert_stream_pending(&mut stream_2).await;
|
|
}
|
|
|
|
/// Test multiple multiple writers and multiple readers on multiple shards
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - writers retrieve consistent shard indexes
|
|
/// - writes go to and reads come from the right shard, similar
|
|
/// to [`test_multi_shard_io`] but less detailed
|
|
/// - multiple writers can write to a single shard
|
|
async fn test_multi_writer_multi_reader<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(2).unwrap()).await;
|
|
|
|
let entry_east_1 = "upc,region=east user=1 100";
|
|
let entry_east_2 = "upc,region=east user=2 200";
|
|
let entry_west_1 = "upc,region=west user=1 200";
|
|
|
|
let writer_1 = context.writing(true).await.unwrap();
|
|
let writer_2 = context.writing(true).await.unwrap();
|
|
let reader_1 = context.reading(true).await.unwrap();
|
|
let reader_2 = context.reading(true).await.unwrap();
|
|
|
|
let mut shard_indexes_1 = writer_1.shard_indexes();
|
|
let shard_indexes_2 = writer_2.shard_indexes();
|
|
assert_eq!(shard_indexes_1, shard_indexes_2);
|
|
assert_eq!(shard_indexes_1.len(), 2);
|
|
let shard_index_1 = set_pop_first(&mut shard_indexes_1).unwrap();
|
|
let shard_index_2 = set_pop_first(&mut shard_indexes_1).unwrap();
|
|
|
|
let w_east_1 = write(
|
|
"namespace",
|
|
&writer_1,
|
|
entry_east_1,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w_west_1 = write(
|
|
"namespace",
|
|
&writer_1,
|
|
entry_west_1,
|
|
shard_index_2,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w_east_2 = write(
|
|
"namespace",
|
|
&writer_2,
|
|
entry_east_2,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
let mut handler_1_1 = reader_1.stream_handler(shard_index_1).await.unwrap();
|
|
let mut handler_1_2 = reader_1.stream_handler(shard_index_2).await.unwrap();
|
|
let mut handler_2_1 = reader_2.stream_handler(shard_index_1).await.unwrap();
|
|
let mut handler_2_2 = reader_2.stream_handler(shard_index_2).await.unwrap();
|
|
|
|
assert_reader_content(&mut handler_1_1, &[&w_east_1, &w_east_2]).await;
|
|
assert_reader_content(&mut handler_1_2, &[&w_west_1]).await;
|
|
assert_reader_content(&mut handler_2_1, &[&w_east_1, &w_east_2]).await;
|
|
assert_reader_content(&mut handler_2_2, &[&w_west_1]).await;
|
|
}
|
|
|
|
/// Test seek implemention of readers.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - seeking is specific to the reader AND shard
|
|
/// - forward and backwards seeking works
|
|
/// - seeking past the end of the known content works (results in "pending" status and
|
|
/// remembers sequence number and not just "next entry")
|
|
async fn test_seek<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(2).unwrap()).await;
|
|
|
|
let entry_east_1 = "upc,region=east user=1 100";
|
|
let entry_east_2 = "upc,region=east user=2 200";
|
|
let entry_east_3 = "upc,region=east user=3 300";
|
|
let entry_west_1 = "upc,region=west user=1 200";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
let shard_index_1 = set_pop_first(&mut shard_indexes).unwrap();
|
|
let shard_index_2 = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
let w_east_1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_1,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w_east_2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_2,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w_west_1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_west_1,
|
|
shard_index_2,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
let reader_1 = context.reading(true).await.unwrap();
|
|
let reader_2 = context.reading(true).await.unwrap();
|
|
|
|
let mut handler_1_1_a = reader_1.stream_handler(shard_index_1).await.unwrap();
|
|
let mut handler_1_2_a = reader_1.stream_handler(shard_index_2).await.unwrap();
|
|
let mut handler_1_1_b = reader_1.stream_handler(shard_index_1).await.unwrap();
|
|
let mut handler_1_2_b = reader_1.stream_handler(shard_index_2).await.unwrap();
|
|
let mut handler_2_1 = reader_2.stream_handler(shard_index_1).await.unwrap();
|
|
let mut handler_2_2 = reader_2.stream_handler(shard_index_2).await.unwrap();
|
|
|
|
// forward seek
|
|
handler_1_1_a
|
|
.seek(w_east_2.meta().sequence().unwrap().sequence_number)
|
|
.await
|
|
.unwrap();
|
|
|
|
assert_reader_content(&mut handler_1_1_a, &[&w_east_2]).await;
|
|
assert_reader_content(&mut handler_1_2_a, &[&w_west_1]).await;
|
|
assert_reader_content(&mut handler_1_1_b, &[&w_east_1, &w_east_2]).await;
|
|
assert_reader_content(&mut handler_1_2_b, &[&w_west_1]).await;
|
|
assert_reader_content(&mut handler_2_1, &[&w_east_1, &w_east_2]).await;
|
|
assert_reader_content(&mut handler_2_2, &[&w_west_1]).await;
|
|
|
|
// backward seek
|
|
handler_1_1_a.seek(SequenceNumber::new(0)).await.unwrap();
|
|
assert_reader_content(&mut handler_1_1_a, &[&w_east_1, &w_east_2]).await;
|
|
|
|
// seek to far end and then add data
|
|
// The affected stream should error and then stop. The other streams should still be
|
|
// pending.
|
|
let err = handler_1_1_a
|
|
.seek(SequenceNumber::new(1_000_000))
|
|
.await
|
|
.expect_err("seeking into the future should be impossible");
|
|
assert_eq!(
|
|
err.kind(),
|
|
WriteBufferErrorKind::SequenceNumberAfterWatermark
|
|
);
|
|
|
|
let w_east_3 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_3,
|
|
ShardIndex::new(0),
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
assert_stream_pending(&mut handler_1_2_a.stream().await).await;
|
|
assert_reader_content(&mut handler_1_1_b, &[&w_east_3]).await;
|
|
assert_stream_pending(&mut handler_1_2_b.stream().await).await;
|
|
assert_reader_content(&mut handler_2_1, &[&w_east_3]).await;
|
|
assert_stream_pending(&mut handler_2_2.stream().await).await;
|
|
|
|
// seeking again should recover the stream
|
|
handler_1_1_a.seek(SequenceNumber::new(0)).await.unwrap();
|
|
assert_reader_content(&mut handler_1_1_a, &[&w_east_1, &w_east_2, &w_east_3]).await;
|
|
}
|
|
|
|
/// Test reset to earliest implemention of readers.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - Calling the function jumps to the earliest available sequence number if the earliest
|
|
/// available sequence number is earlier than the current sequence number
|
|
/// - Calling the function jumps to the earliest available sequence number if the earliest
|
|
/// available sequence number is later than the current sequence number
|
|
async fn test_reset_to_earliest<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(2).unwrap()).await;
|
|
|
|
let entry_east_1 = "upc,region=east user=1 100";
|
|
let entry_east_2 = "upc,region=east user=2 200";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
let shard_index_1 = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
let w_east_1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_1,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w_east_2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_2,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
let reader_1 = context.reading(true).await.unwrap();
|
|
|
|
let mut handler_1_1_a = reader_1.stream_handler(shard_index_1).await.unwrap();
|
|
|
|
// forward seek
|
|
handler_1_1_a
|
|
.seek(w_east_2.meta().sequence().unwrap().sequence_number)
|
|
.await
|
|
.unwrap();
|
|
assert_reader_content(&mut handler_1_1_a, &[&w_east_2]).await;
|
|
|
|
// reset to earliest goes back to 0; stream re-fetches earliest record
|
|
handler_1_1_a.reset_to_earliest();
|
|
assert_reader_content(&mut handler_1_1_a, &[&w_east_1, &w_east_2]).await;
|
|
|
|
// TODO: https://github.com/influxdata/influxdb_iox/issues/4651
|
|
// Remove first write operation to simulate retention policies evicting some records
|
|
// reset to earliest goes to whatever's available
|
|
}
|
|
|
|
/// Test watermark fetching.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - watermarks for empty shards is 0
|
|
/// - watermarks for non-empty shards is "last sequence number plus 1"
|
|
async fn test_watermark<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(2).unwrap()).await;
|
|
|
|
let entry_east_1 = "upc,region=east user=1 100";
|
|
let entry_east_2 = "upc,region=east user=2 200";
|
|
let entry_west_1 = "upc,region=west user=1 200";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
let shard_index_1 = set_pop_first(&mut shard_indexes).unwrap();
|
|
let shard_index_2 = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
// start at watermark 0
|
|
assert_eq!(
|
|
reader.fetch_high_watermark(shard_index_1).await.unwrap(),
|
|
SequenceNumber::new(0),
|
|
);
|
|
assert_eq!(
|
|
reader.fetch_high_watermark(shard_index_2).await.unwrap(),
|
|
SequenceNumber::new(0)
|
|
);
|
|
|
|
// high water mark moves
|
|
write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_1,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w1 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_east_2,
|
|
shard_index_1,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w2 = write(
|
|
"namespace",
|
|
&writer,
|
|
entry_west_1,
|
|
shard_index_2,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
assert_eq!(
|
|
reader.fetch_high_watermark(shard_index_1).await.unwrap(),
|
|
w1.meta().sequence().unwrap().sequence_number + 1
|
|
);
|
|
|
|
assert_eq!(
|
|
reader.fetch_high_watermark(shard_index_2).await.unwrap(),
|
|
w2.meta().sequence().unwrap().sequence_number + 1
|
|
);
|
|
}
|
|
|
|
/// Test that timestamps reported by the readers are sane.
|
|
async fn test_timestamp<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
// Note: Roundtrips are only guaranteed for millisecond-precision
|
|
let t0 = Time::from_timestamp_millis(129);
|
|
let time = Arc::new(iox_time::MockProvider::new(t0));
|
|
let context = adapter
|
|
.new_context_with_time(
|
|
NonZeroU32::try_from(1).unwrap(),
|
|
Arc::<iox_time::MockProvider>::clone(&time),
|
|
)
|
|
.await;
|
|
|
|
let entry = "upc user=1 100";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
assert_eq!(shard_indexes.len(), 1);
|
|
let shard_index = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
let write = write(
|
|
"namespace",
|
|
&writer,
|
|
entry,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let reported_ts = write.meta().producer_ts().unwrap();
|
|
|
|
// advance time
|
|
time.inc(Duration::from_secs(10));
|
|
|
|
// check that the timestamp records the ingestion time, not the read time
|
|
let mut handler = reader.stream_handler(shard_index).await.unwrap();
|
|
let sequenced_entry = handler.stream().await.next().await.unwrap().unwrap();
|
|
let ts_entry = sequenced_entry.meta().producer_ts().unwrap();
|
|
assert_eq!(ts_entry, t0);
|
|
assert_eq!(reported_ts, t0);
|
|
}
|
|
|
|
/// Test that batching multiple messages to the same partition and
|
|
/// shard correctly preserves the timestamps
|
|
///
|
|
/// Coverage of <https://github.com/influxdata/conductor/issues/1000>
|
|
async fn test_timestamp_batching<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
// Note: Roundtrips are only guaranteed for millisecond-precision
|
|
let t0 = Time::from_timestamp_millis(129);
|
|
let time_provider = Arc::new(iox_time::MockProvider::new(t0));
|
|
let context = adapter
|
|
.new_context_with_time(
|
|
NonZeroU32::try_from(1).unwrap(),
|
|
Arc::clone(&time_provider) as _,
|
|
)
|
|
.await;
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let shard_index = set_pop_first(&mut writer.shard_indexes()).unwrap();
|
|
|
|
let bananas_key = PartitionKey::from("bananas");
|
|
let platanos_key = PartitionKey::from("platanos");
|
|
|
|
// Two ops with the same partition keys, first write at time 100
|
|
time_provider.set(time_provider.inc(Duration::from_millis(100)));
|
|
write(
|
|
"ns1",
|
|
&writer,
|
|
"table foo=1",
|
|
shard_index,
|
|
bananas_key.clone(),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
// second write @ time 200
|
|
time_provider.set(time_provider.inc(Duration::from_millis(100)));
|
|
write(
|
|
"ns1",
|
|
&writer,
|
|
"table foo=1",
|
|
shard_index,
|
|
bananas_key.clone(),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
// third write @ time 300
|
|
time_provider.set(time_provider.inc(Duration::from_millis(100)));
|
|
write(
|
|
"ns1",
|
|
&writer,
|
|
"table foo=1",
|
|
shard_index,
|
|
platanos_key.clone(),
|
|
None,
|
|
)
|
|
.await;
|
|
drop(writer);
|
|
|
|
// now at time 400
|
|
time_provider.set(time_provider.inc(Duration::from_millis(100)));
|
|
|
|
let mut handler = reader.stream_handler(shard_index).await.unwrap();
|
|
|
|
let mut stream = handler.stream().await;
|
|
|
|
let dml_op = stream.next().await.unwrap().unwrap();
|
|
assert_eq!(partition_key(&dml_op), Some(&bananas_key));
|
|
assert_eq!(
|
|
dml_op
|
|
.meta()
|
|
.duration_since_production(time_provider.as_ref()),
|
|
Some(Duration::from_millis(300))
|
|
);
|
|
|
|
let dml_op = stream.next().await.unwrap().unwrap();
|
|
assert_eq!(partition_key(&dml_op), Some(&bananas_key));
|
|
assert_eq!(
|
|
dml_op
|
|
.meta()
|
|
.duration_since_production(time_provider.as_ref()),
|
|
Some(Duration::from_millis(200))
|
|
);
|
|
|
|
let dml_op = stream.next().await.unwrap().unwrap();
|
|
assert_eq!(partition_key(&dml_op), Some(&platanos_key));
|
|
assert_eq!(
|
|
dml_op
|
|
.meta()
|
|
.duration_since_production(time_provider.as_ref()),
|
|
Some(Duration::from_millis(100))
|
|
);
|
|
}
|
|
|
|
/// Test that shard auto-creation works.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - both writer and reader cannot be constructed when shards are missing
|
|
/// - both writer and reader can be auto-create shards
|
|
async fn test_shard_auto_creation<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
// fail when shards are missing
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
context.writing(false).await.unwrap_err();
|
|
context.reading(false).await.unwrap_err();
|
|
|
|
// writer can create shards
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
context.writing(true).await.unwrap();
|
|
context.writing(false).await.unwrap();
|
|
context.reading(false).await.unwrap();
|
|
|
|
// reader can create shards
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
context.reading(true).await.unwrap();
|
|
context.reading(false).await.unwrap();
|
|
context.writing(false).await.unwrap();
|
|
}
|
|
|
|
/// Test shard indexes' reporting of readers and writers.
|
|
///
|
|
/// This tests that:
|
|
///
|
|
/// - all shards are reported
|
|
async fn test_shard_indexes<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let n_shards = 10;
|
|
let context = adapter
|
|
.new_context(NonZeroU32::try_from(n_shards).unwrap())
|
|
.await;
|
|
|
|
let writer_1 = context.writing(true).await.unwrap();
|
|
let writer_2 = context.writing(true).await.unwrap();
|
|
let reader_1 = context.reading(true).await.unwrap();
|
|
let reader_2 = context.reading(true).await.unwrap();
|
|
|
|
let shard_indexes_1 = writer_1.shard_indexes();
|
|
let shard_indexes_2 = writer_2.shard_indexes();
|
|
let shard_indexes_3 = reader_1.shard_indexes();
|
|
let shard_indexes_4 = reader_2.shard_indexes();
|
|
assert_eq!(shard_indexes_1.len(), n_shards as usize);
|
|
assert_eq!(shard_indexes_1, shard_indexes_2);
|
|
assert_eq!(shard_indexes_1, shard_indexes_3);
|
|
assert_eq!(shard_indexes_1, shard_indexes_4);
|
|
}
|
|
|
|
/// Test that span contexts are propagated through the system.
|
|
async fn test_span_context<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let entry = "upc user=1 100";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
assert_eq!(shard_indexes.len(), 1);
|
|
let shard_index = set_pop_first(&mut shard_indexes).unwrap();
|
|
let mut handler = reader.stream_handler(shard_index).await.unwrap();
|
|
let mut stream = handler.stream().await;
|
|
|
|
// 1: no context
|
|
write(
|
|
"namespace",
|
|
&writer,
|
|
entry,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
// check write 1
|
|
let write_1 = stream.next().await.unwrap().unwrap();
|
|
assert!(write_1.meta().span_context().is_none());
|
|
|
|
// no spans emitted yet
|
|
let collector = context.trace_collector();
|
|
assert!(collector.spans().is_empty());
|
|
|
|
// 2: some context
|
|
let span_context_1 = SpanContext::new(Arc::clone(&collector) as Arc<_>);
|
|
write(
|
|
"namespace",
|
|
&writer,
|
|
entry,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
Some(&span_context_1),
|
|
)
|
|
.await;
|
|
|
|
// 2: another context
|
|
let span_context_parent = SpanContext::new(Arc::clone(&collector) as Arc<_>);
|
|
let span_context_2 = span_context_parent.child("foo").ctx;
|
|
write(
|
|
"namespace",
|
|
&writer,
|
|
entry,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
Some(&span_context_2),
|
|
)
|
|
.await;
|
|
|
|
// check write 2
|
|
let write_2 = stream.next().await.unwrap().unwrap();
|
|
let actual_context_1 = write_2.meta().span_context().unwrap();
|
|
assert_span_context_eq_or_linked(&span_context_1, actual_context_1, collector.spans());
|
|
|
|
// check write 3
|
|
let write_3 = stream.next().await.unwrap().unwrap();
|
|
let actual_context_2 = write_3.meta().span_context().unwrap();
|
|
assert_span_context_eq_or_linked(&span_context_2, actual_context_2, collector.spans());
|
|
|
|
// check that links / parents make sense
|
|
assert_span_relations_closed(&collector.spans(), &[span_context_1, span_context_2]);
|
|
}
|
|
|
|
/// Test that writing to an unknown shard produces an error
|
|
async fn test_unknown_shard_write<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let tables = mutable_batch_lp::lines_to_batches("upc user=1 100", 0).unwrap();
|
|
let write = DmlWrite::new("foo", tables, Some("bananas".into()), Default::default());
|
|
let operation = DmlOperation::Write(write);
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
|
|
// flip bits to get an unknown shard index
|
|
let shard_index =
|
|
ShardIndex::new(!set_pop_first(&mut writer.shard_indexes()).unwrap().get());
|
|
writer
|
|
.store_operation(shard_index, operation)
|
|
.await
|
|
.unwrap_err();
|
|
}
|
|
|
|
/// Test usage w/ multiple namespaces.
|
|
///
|
|
/// Tests that:
|
|
///
|
|
/// - namespace names or propagated correctly from writer to reader
|
|
/// - all namespaces end up in a single stream
|
|
async fn test_multi_namespaces<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let entry_1 = "upc,region=east user=1 100";
|
|
let entry_2 = "upc,region=east user=2 200";
|
|
|
|
let writer = context.writing(true).await.unwrap();
|
|
let reader = context.reading(true).await.unwrap();
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
assert_eq!(shard_indexes.len(), 1);
|
|
let shard_index = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
let w1 = write(
|
|
"namespace_1",
|
|
&writer,
|
|
entry_2,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
let w2 = write(
|
|
"namespace_2",
|
|
&writer,
|
|
entry_1,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
|
|
let mut handler = reader.stream_handler(shard_index).await.unwrap();
|
|
assert_reader_content(&mut handler, &[&w1, &w2]).await;
|
|
}
|
|
|
|
/// Dummy test to ensure that flushing somewhat works.
|
|
async fn test_flush<T>(adapter: &T)
|
|
where
|
|
T: TestAdapter,
|
|
{
|
|
let context = adapter.new_context(NonZeroU32::try_from(1).unwrap()).await;
|
|
|
|
let writer = Arc::new(context.writing(true).await.unwrap());
|
|
|
|
let mut shard_indexes = writer.shard_indexes();
|
|
assert_eq!(shard_indexes.len(), 1);
|
|
let shard_index = set_pop_first(&mut shard_indexes).unwrap();
|
|
|
|
let mut write_tasks: FuturesUnordered<_> = (0..20)
|
|
.map(|i| {
|
|
let writer = Arc::clone(&writer);
|
|
|
|
async move {
|
|
let entry = format!("upc,region=east user={} {}", i, i);
|
|
|
|
write(
|
|
"ns",
|
|
writer.as_ref(),
|
|
&entry,
|
|
shard_index,
|
|
PartitionKey::from("bananas"),
|
|
None,
|
|
)
|
|
.await;
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
let write_tasks = tokio::spawn(async move { while write_tasks.next().await.is_some() {} });
|
|
|
|
tokio::time::sleep(Duration::from_millis(1)).await;
|
|
|
|
writer.flush().await.unwrap();
|
|
|
|
tokio::time::timeout(Duration::from_millis(1_000), write_tasks)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
}
|
|
|
|
/// Assert that the content of the reader is as expected.
|
|
///
|
|
/// This will read `expected_writes.len()` from the reader and then ensures that the stream is
|
|
/// pending.
|
|
async fn assert_reader_content(
|
|
actual_stream_handler: &mut Box<dyn WriteBufferStreamHandler>,
|
|
expected_writes: &[&DmlWrite],
|
|
) {
|
|
let actual_stream = actual_stream_handler.stream().await;
|
|
|
|
// we need to limit the stream to `expected_writes.len()` elements, otherwise it might be
|
|
// pending forever
|
|
let actual_writes: Vec<_> = actual_stream
|
|
.take(expected_writes.len())
|
|
.try_collect()
|
|
.await
|
|
.unwrap();
|
|
|
|
assert_eq!(actual_writes.len(), expected_writes.len());
|
|
for (actual, expected) in actual_writes.iter().zip(expected_writes.iter()) {
|
|
assert_write_op_eq(actual, expected);
|
|
}
|
|
|
|
// Ensure that stream is pending
|
|
let mut actual_stream = actual_stream_handler.stream().await;
|
|
assert_stream_pending(&mut actual_stream).await;
|
|
}
|
|
|
|
/// Asserts that given span context are the same or that `second` links back to `first`.
|
|
///
|
|
/// "Same" means:
|
|
///
|
|
/// - identical trace ID
|
|
/// - identical span ID
|
|
/// - identical parent span ID
|
|
pub(crate) fn assert_span_context_eq_or_linked(
|
|
first: &SpanContext,
|
|
second: &SpanContext,
|
|
spans: Vec<Span>,
|
|
) {
|
|
// search for links
|
|
for span in spans {
|
|
if (span.ctx.trace_id == second.trace_id) && (span.ctx.span_id == second.span_id) {
|
|
// second context was emitted as span
|
|
|
|
// check if it links to first context
|
|
for (trace_id, span_id) in span.ctx.links {
|
|
if (trace_id == first.trace_id) && (span_id == first.span_id) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// no link found
|
|
assert_eq!(first.trace_id, second.trace_id);
|
|
assert_eq!(first.span_id, second.span_id);
|
|
assert_eq!(first.parent_span_id, second.parent_span_id);
|
|
}
|
|
|
|
/// Assert that all span relations (parents, links) are found within the set of spans or within
|
|
/// the set of roots.
|
|
fn assert_span_relations_closed(spans: &[Span], roots: &[SpanContext]) {
|
|
let all_ids: HashSet<_> = spans
|
|
.iter()
|
|
.map(|span| (span.ctx.trace_id, span.ctx.span_id))
|
|
.chain(roots.iter().map(|ctx| (ctx.trace_id, ctx.span_id)))
|
|
.collect();
|
|
|
|
for span in spans {
|
|
if let Some(parent_span_id) = span.ctx.parent_span_id {
|
|
assert!(all_ids.contains(&(span.ctx.trace_id, parent_span_id)));
|
|
}
|
|
for link in &span.ctx.links {
|
|
assert!(all_ids.contains(link));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Assert that given stream is pending.
|
|
///
|
|
/// This will will try to poll the stream for a bit to ensure that async IO has a chance to
|
|
/// catch up.
|
|
async fn assert_stream_pending<S>(stream: &mut S)
|
|
where
|
|
S: Stream + Send + Unpin,
|
|
S::Item: std::fmt::Debug,
|
|
{
|
|
tokio::select! {
|
|
e = stream.next() => panic!("stream is not pending, yielded: {e:?}"),
|
|
_ = tokio::time::sleep(Duration::from_millis(10)) => {},
|
|
};
|
|
}
|
|
|
|
/// Pops first entry from set.
|
|
///
|
|
/// Helper until <https://github.com/rust-lang/rust/issues/62924> is stable.
|
|
pub(crate) fn set_pop_first<T>(set: &mut BTreeSet<T>) -> Option<T>
|
|
where
|
|
T: Clone + Ord,
|
|
{
|
|
set.iter().next().cloned().and_then(|k| set.take(&k))
|
|
}
|
|
|
|
/// Get the testing Kafka connection string or return current scope.
|
|
///
|
|
/// If `TEST_INTEGRATION` and `KAFKA_CONNECT` are set, return the Kafka connection URL to the
|
|
/// caller.
|
|
///
|
|
/// If `TEST_INTEGRATION` is set but `KAFKA_CONNECT` is not set, fail the tests and provide
|
|
/// guidance for setting `KAFKA_CONNECTION`.
|
|
///
|
|
/// If `TEST_INTEGRATION` is not set, skip the calling test by returning early.
|
|
#[macro_export]
|
|
macro_rules! maybe_skip_kafka_integration {
|
|
() => {
|
|
maybe_skip_kafka_integration!("")
|
|
};
|
|
($panic_msg:expr) => {{
|
|
use std::env;
|
|
dotenvy::dotenv().ok();
|
|
|
|
let panic_msg: &'static str = $panic_msg;
|
|
|
|
match (
|
|
env::var("TEST_INTEGRATION").is_ok(),
|
|
env::var("KAFKA_CONNECT").ok(),
|
|
) {
|
|
(true, Some(kafka_connection)) => kafka_connection,
|
|
(true, None) => {
|
|
panic!(
|
|
"TEST_INTEGRATION is set which requires running integration tests, but \
|
|
KAFKA_CONNECT is not set. Please run Kafka, perhaps by using the command \
|
|
`docker-compose -f docker/ci-kafka-docker-compose.yml up kafka`, then \
|
|
set KAFKA_CONNECT to the host and port where Kafka is accessible. If \
|
|
running the `docker-compose` command and the Rust tests on the host, the \
|
|
value for `KAFKA_CONNECT` should be `localhost:9093`. If running the Rust \
|
|
tests in another container in the `docker-compose` network as on CI, \
|
|
`KAFKA_CONNECT` should be `kafka:9092`."
|
|
)
|
|
}
|
|
(false, Some(_)) => {
|
|
eprintln!("skipping Kafka integration tests - set TEST_INTEGRATION to run");
|
|
if !panic_msg.is_empty() {
|
|
panic!("{}", panic_msg);
|
|
} else {
|
|
return;
|
|
}
|
|
}
|
|
(false, None) => {
|
|
eprintln!(
|
|
"skipping Kafka integration tests - set TEST_INTEGRATION and KAFKA_CONNECT \
|
|
to run"
|
|
);
|
|
if !panic_msg.is_empty() {
|
|
panic!("{}", panic_msg);
|
|
} else {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}};
|
|
}
|
|
|
|
fn partition_key(dml_op: &DmlOperation) -> Option<&PartitionKey> {
|
|
match dml_op {
|
|
DmlOperation::Write(w) => w.partition_key(),
|
|
DmlOperation::Delete(_) => None,
|
|
}
|
|
}
|
|
}
|