Merge pull request #5908 from influxdata/dom/ingester-integration-tests
test: ingester integration testspull/24376/head
commit
66035ada48
|
|
@ -435,192 +435,20 @@ impl<T> Drop for IngestHandlerImpl<T> {
|
|||
mod tests {
|
||||
use std::{num::NonZeroU32, ops::DerefMut};
|
||||
|
||||
use data_types::{Namespace, NamespaceSchema, QueryPool, Sequence, SequenceNumber};
|
||||
use data_types::{Namespace, NamespaceSchema, Sequence, SequenceNumber};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
||||
use iox_time::Time;
|
||||
use metric::{Attributes, Metric, U64Counter, U64Gauge};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use object_store::memory::InMemory;
|
||||
use test_helpers::maybe_start_logging;
|
||||
use write_buffer::mock::{MockBufferForReading, MockBufferSharedState};
|
||||
|
||||
use super::*;
|
||||
use crate::data::{partition::SnapshotBatch, table::TableName};
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_from_write_buffer_write_to_mutable_buffer() {
|
||||
let ingester = TestIngester::new().await;
|
||||
|
||||
let schema = NamespaceSchema::new(
|
||||
ingester.namespace.id,
|
||||
ingester.topic.id,
|
||||
ingester.query_pool.id,
|
||||
100,
|
||||
);
|
||||
let mut txn = ingester.catalog.start_transaction().await.unwrap();
|
||||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let ingest_ts2 = Time::from_timestamp_millis(1337);
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(0)),
|
||||
ingest_ts1,
|
||||
None,
|
||||
50,
|
||||
),
|
||||
);
|
||||
let schema = validate_or_insert_schema(w1.tables(), &schema, txn.deref_mut())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
ingester.write_buffer_state.push_write(w1);
|
||||
let w2 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("cpu bar=2 20\ncpu bar=3 30", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(7)),
|
||||
ingest_ts2,
|
||||
None,
|
||||
150,
|
||||
),
|
||||
);
|
||||
let _schema = validate_or_insert_schema(w2.tables(), &schema, txn.deref_mut())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
ingester.write_buffer_state.push_write(w2);
|
||||
let w3 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("a b=2 200", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(9)),
|
||||
ingest_ts2,
|
||||
None,
|
||||
150,
|
||||
),
|
||||
);
|
||||
let _schema = validate_or_insert_schema(w3.tables(), &schema, txn.deref_mut())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
ingester.write_buffer_state.push_write(w3);
|
||||
txn.commit().await.unwrap();
|
||||
|
||||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
||||
// data in there from both writes.
|
||||
tokio::time::timeout(Duration::from_secs(2), async {
|
||||
let ns_name = ingester.namespace.name.into();
|
||||
let table_name = TableName::from("a");
|
||||
loop {
|
||||
let mut has_measurement = false;
|
||||
|
||||
if let Some(data) = ingester.ingester.data.shard(ingester.shard.id) {
|
||||
if let Some(data) = data.namespace(&ns_name) {
|
||||
// verify there's data in the buffer
|
||||
if let Some((b, _)) = data.snapshot(&table_name, &"1970-01-01".into()).await
|
||||
{
|
||||
if let Some(b) = b.first() {
|
||||
if b.data.num_rows() > 0 {
|
||||
has_measurement = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// and ensure that the shard state was actually updated
|
||||
let shard = ingester
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.create_or_get(&ingester.topic, ingester.shard_index)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if has_measurement
|
||||
&& shard.min_unpersisted_sequence_number == SequenceNumber::new(9)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
.expect("timeout");
|
||||
|
||||
let observation = ingester
|
||||
.metrics
|
||||
.get_instrument::<Metric<DurationHistogram>>("ingester_op_apply_duration")
|
||||
.unwrap()
|
||||
.get_observer(&Attributes::from(&[
|
||||
("kafka_topic", "whatevs"),
|
||||
("kafka_partition", "0"),
|
||||
("result", "success"),
|
||||
]))
|
||||
.unwrap()
|
||||
.fetch();
|
||||
let hits = observation.buckets.iter().map(|b| b.count).sum::<u64>();
|
||||
assert_eq!(hits, 3);
|
||||
|
||||
let observation = ingester
|
||||
.metrics
|
||||
.get_instrument::<Metric<U64Counter>>("ingester_write_buffer_read_bytes")
|
||||
.unwrap()
|
||||
.get_observer(&Attributes::from(&[
|
||||
("kafka_topic", "whatevs"),
|
||||
("kafka_partition", "0"),
|
||||
]))
|
||||
.unwrap()
|
||||
.fetch();
|
||||
assert_eq!(observation, 350);
|
||||
|
||||
let observation = ingester
|
||||
.metrics
|
||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_last_sequence_number")
|
||||
.unwrap()
|
||||
.get_observer(&Attributes::from(&[
|
||||
("kafka_topic", "whatevs"),
|
||||
("kafka_partition", "0"),
|
||||
]))
|
||||
.unwrap()
|
||||
.fetch();
|
||||
assert_eq!(observation, 9);
|
||||
|
||||
let observation = ingester
|
||||
.metrics
|
||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_sequence_number_lag")
|
||||
.unwrap()
|
||||
.get_observer(&Attributes::from(&[
|
||||
("kafka_topic", "whatevs"),
|
||||
("kafka_partition", "0"),
|
||||
]))
|
||||
.unwrap()
|
||||
.fetch();
|
||||
assert_eq!(observation, 0);
|
||||
|
||||
let observation = ingester
|
||||
.metrics
|
||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_last_ingest_ts")
|
||||
.unwrap()
|
||||
.get_observer(&Attributes::from(&[
|
||||
("kafka_topic", "whatevs"),
|
||||
("kafka_partition", "0"),
|
||||
]))
|
||||
.unwrap()
|
||||
.fetch();
|
||||
assert_eq!(observation, ingest_ts2.timestamp_nanos() as u64);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_shutdown() {
|
||||
let ingester = TestIngester::new().await.ingester;
|
||||
let (ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||
|
||||
// does not exit w/o shutdown
|
||||
tokio::select! {
|
||||
|
|
@ -638,7 +466,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
#[should_panic(expected = "Background worker 'bad_task' exited early!")]
|
||||
async fn test_join_task_early_shutdown() {
|
||||
let mut ingester = TestIngester::new().await.ingester;
|
||||
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||
|
||||
let shutdown_task = tokio::spawn(async {
|
||||
// It does nothing! and stops.
|
||||
|
|
@ -655,7 +483,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
#[should_panic(expected = "JoinError::Panic")]
|
||||
async fn test_join_task_panic() {
|
||||
let mut ingester = TestIngester::new().await.ingester;
|
||||
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||
|
||||
let shutdown_task = tokio::spawn(async {
|
||||
panic!("bananas");
|
||||
|
|
@ -754,88 +582,6 @@ mod tests {
|
|||
(ingester, shard, namespace)
|
||||
}
|
||||
|
||||
async fn verify_ingester_buffer_has_data(
|
||||
ingester: IngestHandlerImpl,
|
||||
shard: Shard,
|
||||
namespace: Namespace,
|
||||
custom_batch_verification: impl Fn(&SnapshotBatch) + Send,
|
||||
) {
|
||||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
||||
// data in there
|
||||
tokio::time::timeout(Duration::from_secs(1), async move {
|
||||
let ns_name = namespace.name.into();
|
||||
let table_name = TableName::from("cpu");
|
||||
loop {
|
||||
let mut has_measurement = false;
|
||||
|
||||
if let Some(data) = ingester.data.shard(shard.id) {
|
||||
if let Some(data) = data.namespace(&ns_name) {
|
||||
// verify there's data in the buffer
|
||||
if let Some((b, _)) = data.snapshot(&table_name, &"1970-01-01".into()).await
|
||||
{
|
||||
if let Some(b) = b.first() {
|
||||
custom_batch_verification(b);
|
||||
|
||||
if b.data.num_rows() == 1 {
|
||||
has_measurement = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if has_measurement {
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
.expect("timeout");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seeks_on_initialization() {
|
||||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let ingest_ts2 = Time::from_timestamp_millis(1337);
|
||||
let write_operations = vec![
|
||||
DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("cpu bar=2 20", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(1)),
|
||||
ingest_ts1,
|
||||
None,
|
||||
150,
|
||||
),
|
||||
),
|
||||
DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("cpu bar=2 30", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(2)),
|
||||
ingest_ts2,
|
||||
None,
|
||||
150,
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
let (ingester, shard, namespace) = ingester_test_setup(write_operations, 2, false).await;
|
||||
|
||||
verify_ingester_buffer_has_data(ingester, shard, namespace, |first_batch| {
|
||||
if first_batch.min_sequence_number == SequenceNumber::new(1) {
|
||||
panic!(
|
||||
"initialization did a seek to the beginning rather than the min_unpersisted"
|
||||
);
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic(expected = "JoinError::Panic")]
|
||||
async fn sequence_number_no_longer_exists() {
|
||||
|
|
@ -912,41 +658,9 @@ mod tests {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn skip_to_oldest_available() {
|
||||
maybe_start_logging();
|
||||
|
||||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let write_operations = vec![DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("cpu bar=2 20", 0).unwrap(),
|
||||
Some("1970-01-01".into()),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(10)),
|
||||
ingest_ts1,
|
||||
None,
|
||||
150,
|
||||
),
|
||||
)];
|
||||
|
||||
// Set the min unpersisted to something bigger than the write's sequence number to
|
||||
// cause an UnknownSequenceNumber error. Skip to oldest available = true, so ingester
|
||||
// should find data
|
||||
let (ingester, shard, namespace) = ingester_test_setup(write_operations, 1, true).await;
|
||||
|
||||
verify_ingester_buffer_has_data(ingester, shard, namespace, |first_batch| {
|
||||
assert_eq!(
|
||||
first_batch.min_sequence_number,
|
||||
SequenceNumber::new(10),
|
||||
"re-initialization didn't seek to the beginning",
|
||||
);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn limits_concurrent_queries() {
|
||||
let mut ingester = TestIngester::new().await;
|
||||
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||
let request = IngesterQueryRequest {
|
||||
namespace: "foo".to_string(),
|
||||
table: "cpu".to_string(),
|
||||
|
|
@ -954,93 +668,14 @@ mod tests {
|
|||
predicate: None,
|
||||
};
|
||||
|
||||
let res = ingester.ingester.query(request.clone()).await.unwrap_err();
|
||||
let res = ingester.query(request.clone()).await.unwrap_err();
|
||||
assert!(matches!(
|
||||
res,
|
||||
crate::querier_handler::Error::NamespaceNotFound { .. }
|
||||
));
|
||||
|
||||
ingester.ingester.request_sem = Semaphore::new(0);
|
||||
let res = ingester.ingester.query(request).await.unwrap_err();
|
||||
ingester.request_sem = Semaphore::new(0);
|
||||
let res = ingester.query(request).await.unwrap_err();
|
||||
assert!(matches!(res, crate::querier_handler::Error::RequestLimit));
|
||||
}
|
||||
|
||||
struct TestIngester {
|
||||
catalog: Arc<dyn Catalog>,
|
||||
shard: Shard,
|
||||
namespace: Namespace,
|
||||
topic: TopicMetadata,
|
||||
shard_index: ShardIndex,
|
||||
query_pool: QueryPool,
|
||||
metrics: Arc<metric::Registry>,
|
||||
write_buffer_state: MockBufferSharedState,
|
||||
ingester: IngestHandlerImpl,
|
||||
}
|
||||
|
||||
impl TestIngester {
|
||||
async fn new() -> Self {
|
||||
let metrics: Arc<metric::Registry> = Default::default();
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
let mut txn = catalog.start_transaction().await.unwrap();
|
||||
let topic = txn.topics().create_or_get("whatevs").await.unwrap();
|
||||
let query_pool = txn.query_pools().create_or_get("whatevs").await.unwrap();
|
||||
let shard_index = ShardIndex::new(0);
|
||||
let namespace = txn
|
||||
.namespaces()
|
||||
.create("foo", "inf", topic.id, query_pool.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let shard = txn
|
||||
.shards()
|
||||
.create_or_get(&topic, shard_index)
|
||||
.await
|
||||
.unwrap();
|
||||
txn.commit().await.unwrap();
|
||||
|
||||
let mut shard_states = BTreeMap::new();
|
||||
shard_states.insert(shard_index, shard);
|
||||
|
||||
let write_buffer_state =
|
||||
MockBufferSharedState::empty_with_n_shards(NonZeroU32::try_from(1).unwrap());
|
||||
let reading: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap());
|
||||
let object_store = Arc::new(InMemory::new());
|
||||
|
||||
let lifecycle_config = LifecycleConfig::new(
|
||||
1000000,
|
||||
1000,
|
||||
1000,
|
||||
Duration::from_secs(10),
|
||||
Duration::from_secs(10),
|
||||
10000000,
|
||||
);
|
||||
let ingester = IngestHandlerImpl::new(
|
||||
lifecycle_config,
|
||||
topic.clone(),
|
||||
shard_states,
|
||||
Arc::clone(&catalog),
|
||||
object_store,
|
||||
reading,
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::clone(&metrics),
|
||||
false,
|
||||
1,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
catalog,
|
||||
shard,
|
||||
namespace,
|
||||
topic,
|
||||
shard_index,
|
||||
query_pool,
|
||||
metrics,
|
||||
write_buffer_state,
|
||||
ingester,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ pub struct LifecycleConfig {
|
|||
impl LifecycleConfig {
|
||||
/// Initialize a new LifecycleConfig. panics if the passed `pause_ingest_size` is less than the
|
||||
/// `persist_memory_threshold`.
|
||||
pub fn new(
|
||||
pub const fn new(
|
||||
pause_ingest_size: usize,
|
||||
persist_memory_threshold: usize,
|
||||
partition_size_threshold: usize,
|
||||
|
|
|
|||
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
|
||||
use arrow::{error::ArrowError, record_batch::RecordBatch};
|
||||
use arrow::{array::new_null_array, error::ArrowError, record_batch::RecordBatch};
|
||||
use arrow_util::optimize::{optimize_record_batch, optimize_schema};
|
||||
use data_types::{PartitionId, SequenceNumber};
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::{Stream, StreamExt};
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::selection::Selection;
|
||||
use schema::{merge::SchemaMerger, selection::Selection};
|
||||
use snafu::{ensure, Snafu};
|
||||
|
||||
use crate::{
|
||||
|
|
@ -168,10 +168,63 @@ impl IngesterQueryResponse {
|
|||
})
|
||||
.boxed()
|
||||
}
|
||||
|
||||
/// Convert [`IngesterQueryResponse`] to a set of [`RecordBatch`]es.
|
||||
///
|
||||
/// If the response contains multiple snapshots, this will merge the schemas into a single one and create
|
||||
/// NULL-columns for snapshots that miss columns.
|
||||
///
|
||||
/// # Panic
|
||||
/// Panics if there are no batches returned at all. Also panics if the snapshot-scoped schemas do not line up with
|
||||
/// the snapshot-scoped record batches.
|
||||
pub async fn into_record_batches(self) -> Vec<RecordBatch> {
|
||||
let mut snapshot_schema = None;
|
||||
let mut schema_merger = SchemaMerger::new();
|
||||
let mut batches = vec![];
|
||||
|
||||
let mut stream = self.flatten();
|
||||
while let Some(msg) = stream.try_next().await.unwrap() {
|
||||
match msg {
|
||||
FlatIngesterQueryResponse::StartPartition { .. } => (),
|
||||
FlatIngesterQueryResponse::RecordBatch { batch } => {
|
||||
let last_schema = snapshot_schema.as_ref().unwrap();
|
||||
assert_eq!(&batch.schema(), last_schema);
|
||||
batches.push(batch);
|
||||
}
|
||||
FlatIngesterQueryResponse::StartSnapshot { schema } => {
|
||||
snapshot_schema = Some(Arc::clone(&schema));
|
||||
|
||||
schema_merger = schema_merger
|
||||
.merge(&schema::Schema::try_from(schema).unwrap())
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(!batches.is_empty());
|
||||
|
||||
// equalize schemas
|
||||
let common_schema = schema_merger.build().as_arrow();
|
||||
batches
|
||||
.into_iter()
|
||||
.map(|batch| {
|
||||
let batch_schema = batch.schema();
|
||||
let columns = common_schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| match batch_schema.index_of(field.name()) {
|
||||
Ok(idx) => Arc::clone(batch.column(idx)),
|
||||
Err(_) => new_null_array(field.data_type(), batch.num_rows()),
|
||||
})
|
||||
.collect();
|
||||
RecordBatch::try_new(Arc::clone(&common_schema), columns).unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Flattened version of [`IngesterQueryResponse`].
|
||||
pub(crate) type FlatIngesterQueryResponseStream =
|
||||
pub type FlatIngesterQueryResponseStream =
|
||||
Pin<Box<dyn Stream<Item = Result<FlatIngesterQueryResponse, ArrowError>> + Send>>;
|
||||
|
||||
/// Element within the flat wire protocol.
|
||||
|
|
@ -347,17 +400,15 @@ fn prepare_data_to_querier_for_partition(
|
|||
mod tests {
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use arrow::{array::new_null_array, datatypes::SchemaRef, record_batch::RecordBatch};
|
||||
use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use assert_matches::assert_matches;
|
||||
use datafusion::{
|
||||
physical_plan::RecordBatchStream,
|
||||
prelude::{col, lit},
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use predicate::Predicate;
|
||||
use schema::merge::SchemaMerger;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::{make_ingester_data, DataLocation, TEST_NAMESPACE, TEST_TABLE};
|
||||
|
|
@ -490,8 +541,11 @@ mod tests {
|
|||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
let result = prepare_data_to_querier(scenario, &request)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_record_batches()
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &result);
|
||||
}
|
||||
|
||||
|
|
@ -523,8 +577,11 @@ mod tests {
|
|||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
let result = prepare_data_to_querier(scenario, &request)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_record_batches()
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &result);
|
||||
}
|
||||
|
||||
|
|
@ -565,8 +622,11 @@ mod tests {
|
|||
];
|
||||
for (loc, scenario) in &scenarios {
|
||||
println!("Location: {loc:?}");
|
||||
let stream = prepare_data_to_querier(scenario, &request).await.unwrap();
|
||||
let result = ingester_response_to_record_batches(stream).await;
|
||||
let result = prepare_data_to_querier(scenario, &request)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_record_batches()
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &result);
|
||||
}
|
||||
|
||||
|
|
@ -640,60 +700,4 @@ mod tests {
|
|||
fn lp_to_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
}
|
||||
|
||||
/// Convert [`IngesterQueryResponse`] to a set of [`RecordBatch`]es.
|
||||
///
|
||||
/// If the response contains multiple snapshots, this will merge the schemas into a single one and create
|
||||
/// NULL-columns for snapshots that miss columns. This makes it easier to use the resulting batches with
|
||||
/// [`assert_batches_sorted_eq`].
|
||||
///
|
||||
/// # Panic
|
||||
/// Panics if there are no batches returned at all. Also panics if the snapshot-scoped schemas do not line up with
|
||||
/// the snapshot-scoped record batches.
|
||||
async fn ingester_response_to_record_batches(
|
||||
response: IngesterQueryResponse,
|
||||
) -> Vec<RecordBatch> {
|
||||
let mut snapshot_schema = None;
|
||||
let mut schema_merger = SchemaMerger::new();
|
||||
let mut batches = vec![];
|
||||
|
||||
let mut stream = response.flatten();
|
||||
while let Some(msg) = stream.try_next().await.unwrap() {
|
||||
match msg {
|
||||
FlatIngesterQueryResponse::StartPartition { .. } => (),
|
||||
FlatIngesterQueryResponse::RecordBatch { batch } => {
|
||||
let last_schema = snapshot_schema.as_ref().unwrap();
|
||||
assert_eq!(&batch.schema(), last_schema);
|
||||
batches.push(batch);
|
||||
}
|
||||
FlatIngesterQueryResponse::StartSnapshot { schema } => {
|
||||
snapshot_schema = Some(Arc::clone(&schema));
|
||||
|
||||
schema_merger = schema_merger
|
||||
.merge(&schema::Schema::try_from(schema).unwrap())
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(!batches.is_empty());
|
||||
|
||||
// equalize schemas
|
||||
let common_schema = schema_merger.build().as_arrow();
|
||||
batches
|
||||
.into_iter()
|
||||
.map(|batch| {
|
||||
let batch_schema = batch.schema();
|
||||
let columns = common_schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| match batch_schema.index_of(field.name()) {
|
||||
Ok(idx) => Arc::clone(batch.column(idx)),
|
||||
Err(_) => new_null_array(field.data_type(), batch.num_rows()),
|
||||
})
|
||||
.collect();
|
||||
RecordBatch::try_new(Arc::clone(&common_schema), columns).unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -279,7 +279,8 @@ where
|
|||
shard_index=%self.shard_index,
|
||||
shard_id=%self.shard_id,
|
||||
potential_data_loss=true,
|
||||
"reset stream"
|
||||
"unable to read from desired sequence number offset \
|
||||
- reset stream to oldest available data"
|
||||
);
|
||||
self.shard_reset_count.inc(1);
|
||||
sequence_number_before_reset = Some(self.current_sequence_number);
|
||||
|
|
@ -293,7 +294,8 @@ where
|
|||
shard_index=%self.shard_index,
|
||||
shard_id=%self.shard_id,
|
||||
potential_data_loss=true,
|
||||
"unable to read from desired sequence number offset"
|
||||
"unable to read from desired sequence number offset \
|
||||
- aborting ingest due to configuration"
|
||||
);
|
||||
self.shard_unknown_sequence_number_count.inc(1);
|
||||
None
|
||||
|
|
|
|||
|
|
@ -0,0 +1,350 @@
|
|||
use std::{collections::HashMap, num::NonZeroU32, sync::Arc, time::Duration};
|
||||
|
||||
use data_types::{
|
||||
Namespace, NamespaceSchema, PartitionKey, QueryPoolId, Sequence, SequenceNumber, ShardId,
|
||||
ShardIndex, TopicId,
|
||||
};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use ingester::{
|
||||
handler::{IngestHandler, IngestHandlerImpl},
|
||||
lifecycle::LifecycleConfig,
|
||||
querier_handler::IngesterQueryResponse,
|
||||
};
|
||||
use iox_catalog::{interface::Catalog, mem::MemCatalog, validate_or_insert_schema};
|
||||
use iox_query::exec::Executor;
|
||||
use iox_time::TimeProvider;
|
||||
use metric::{Attributes, Metric, MetricObserver};
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use object_store::DynObjectStore;
|
||||
use observability_deps::tracing::*;
|
||||
use test_helpers::{maybe_start_logging, timeout::FutureTimeout};
|
||||
use write_buffer::{
|
||||
core::WriteBufferReading,
|
||||
mock::{MockBufferForReading, MockBufferSharedState},
|
||||
};
|
||||
use write_summary::ShardProgress;
|
||||
|
||||
/// The byte size of 1 MiB.
|
||||
const ONE_MIB: usize = 1024 * 1024;
|
||||
|
||||
/// The shard index used for the [`TestContext`].
|
||||
pub const TEST_SHARD_INDEX: ShardIndex = ShardIndex::new(0);
|
||||
|
||||
/// The topic name used for tests.
|
||||
pub const TEST_TOPIC_NAME: &str = "banana-topics";
|
||||
|
||||
/// The lifecycle configuration used for tests.
|
||||
pub const TEST_LIFECYCLE_CONFIG: LifecycleConfig = LifecycleConfig::new(
|
||||
ONE_MIB,
|
||||
ONE_MIB / 10,
|
||||
ONE_MIB / 10,
|
||||
Duration::from_secs(10),
|
||||
Duration::from_secs(10),
|
||||
1_000,
|
||||
);
|
||||
|
||||
pub struct TestContext {
|
||||
ingester: IngestHandlerImpl,
|
||||
|
||||
// Catalog data initialised at construction time for later reuse.
|
||||
query_id: QueryPoolId,
|
||||
topic_id: TopicId,
|
||||
shard_id: ShardId,
|
||||
|
||||
// A map of namespaces to schemas, also serving as the set of known
|
||||
// namespaces.
|
||||
namespaces: HashMap<String, NamespaceSchema>,
|
||||
|
||||
catalog: Arc<dyn Catalog>,
|
||||
object_store: Arc<DynObjectStore>,
|
||||
write_buffer_state: MockBufferSharedState,
|
||||
metrics: Arc<metric::Registry>,
|
||||
}
|
||||
|
||||
impl TestContext {
|
||||
pub async fn new() -> Self {
|
||||
maybe_start_logging();
|
||||
|
||||
let metrics: Arc<metric::Registry> = Default::default();
|
||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
||||
|
||||
// Initialise a topic, query pool and shard.
|
||||
//
|
||||
// Note that tests should set up their own namespace via
|
||||
// ensure_namespace()
|
||||
let mut txn = catalog.start_transaction().await.unwrap();
|
||||
let topic = txn.topics().create_or_get(TEST_TOPIC_NAME).await.unwrap();
|
||||
let query_id = txn
|
||||
.query_pools()
|
||||
.create_or_get("banana-query-pool")
|
||||
.await
|
||||
.unwrap()
|
||||
.id;
|
||||
let shard = txn
|
||||
.shards()
|
||||
.create_or_get(&topic, TEST_SHARD_INDEX)
|
||||
.await
|
||||
.unwrap();
|
||||
txn.commit().await.unwrap();
|
||||
|
||||
// Mock in-memory write buffer.
|
||||
let write_buffer_state =
|
||||
MockBufferSharedState::empty_with_n_shards(NonZeroU32::try_from(1).unwrap());
|
||||
let write_buffer_read: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap());
|
||||
|
||||
// Mock object store that persists in memory.
|
||||
let object_store: Arc<DynObjectStore> = Arc::new(object_store::memory::InMemory::new());
|
||||
|
||||
let ingester = IngestHandlerImpl::new(
|
||||
TEST_LIFECYCLE_CONFIG,
|
||||
topic.clone(),
|
||||
[(TEST_SHARD_INDEX, shard)].into_iter().collect(),
|
||||
Arc::clone(&catalog),
|
||||
Arc::clone(&object_store),
|
||||
write_buffer_read,
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::clone(&metrics),
|
||||
true,
|
||||
1,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
ingester,
|
||||
query_id,
|
||||
topic_id: topic.id,
|
||||
shard_id: shard.id,
|
||||
catalog,
|
||||
object_store,
|
||||
write_buffer_state,
|
||||
metrics,
|
||||
namespaces: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Restart the Ingester, driving initialisation again.
|
||||
///
|
||||
/// NOTE: metric contents are not reset.
|
||||
pub async fn restart(&mut self) {
|
||||
info!("restarting test context ingester");
|
||||
|
||||
let write_buffer_read: Arc<dyn WriteBufferReading> =
|
||||
Arc::new(MockBufferForReading::new(self.write_buffer_state.clone(), None).unwrap());
|
||||
|
||||
let topic = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.topics()
|
||||
.create_or_get(TEST_TOPIC_NAME)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let shard = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.create_or_get(&topic, TEST_SHARD_INDEX)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
self.ingester = IngestHandlerImpl::new(
|
||||
TEST_LIFECYCLE_CONFIG,
|
||||
topic,
|
||||
[(TEST_SHARD_INDEX, shard)].into_iter().collect(),
|
||||
Arc::clone(&self.catalog),
|
||||
Arc::clone(&self.object_store),
|
||||
write_buffer_read,
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::clone(&self.metrics),
|
||||
true,
|
||||
1,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Create a namespace in the catalog for the ingester to discover.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Must not be called twice with the same `name`.
|
||||
#[track_caller]
|
||||
pub async fn ensure_namespace(&mut self, name: &str) -> Namespace {
|
||||
let ns = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.namespaces()
|
||||
.create(
|
||||
name,
|
||||
iox_catalog::INFINITE_RETENTION_POLICY,
|
||||
self.topic_id,
|
||||
self.query_id,
|
||||
)
|
||||
.await
|
||||
.expect("failed to create test namespace");
|
||||
|
||||
assert!(
|
||||
self.namespaces
|
||||
.insert(
|
||||
name.to_owned(),
|
||||
NamespaceSchema::new(
|
||||
ns.id,
|
||||
self.topic_id,
|
||||
self.query_id,
|
||||
iox_catalog::DEFAULT_MAX_COLUMNS_PER_TABLE,
|
||||
),
|
||||
)
|
||||
.is_none(),
|
||||
"namespace must not be duplicated"
|
||||
);
|
||||
|
||||
debug!(?ns, "test namespace created");
|
||||
|
||||
ns
|
||||
}
|
||||
|
||||
/// Enqueue the specified `op` into the write buffer for the ingester to
|
||||
/// consume.
|
||||
///
|
||||
/// This call takes care of validating the schema of `op` and populating the
|
||||
/// catalog with any new schema elements.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This method panics if the namespace for `op` does not exist, or the
|
||||
/// schema is invalid or conflicts with the existing namespace schema.
|
||||
#[track_caller]
|
||||
pub async fn enqueue_write(&mut self, op: DmlWrite) -> SequenceNumber {
|
||||
let schema = self
|
||||
.namespaces
|
||||
.get_mut(op.namespace())
|
||||
.expect("namespace does not exist");
|
||||
|
||||
// Pull the sequence number out of the op to return it back to the user
|
||||
// for simplicity.
|
||||
let offset = op
|
||||
.meta()
|
||||
.sequence()
|
||||
.expect("write must be sequenced")
|
||||
.sequence_number;
|
||||
|
||||
// Perform schema validation, populating the catalog.
|
||||
let mut repo = self.catalog.repositories().await;
|
||||
if let Some(new) = validate_or_insert_schema(op.tables(), schema, repo.as_mut())
|
||||
.await
|
||||
.expect("failed schema validation for enqueuing write")
|
||||
{
|
||||
// Retain the updated schema.
|
||||
debug!(?schema, "updated test context schema");
|
||||
*schema = new;
|
||||
}
|
||||
|
||||
// Push the write into the write buffer.
|
||||
self.write_buffer_state.push_write(op);
|
||||
|
||||
debug!(?offset, "enqueued write in write buffer");
|
||||
offset
|
||||
}
|
||||
|
||||
/// A helper wrapper over [`Self::enqueue_write()`] for line-protocol.
|
||||
#[track_caller]
|
||||
pub async fn write_lp(
|
||||
&mut self,
|
||||
namespace: &str,
|
||||
lp: &str,
|
||||
partition_key: PartitionKey,
|
||||
sequence_number: i64,
|
||||
) -> SequenceNumber {
|
||||
self.enqueue_write(DmlWrite::new(
|
||||
namespace,
|
||||
lines_to_batches(lp, 0).unwrap(),
|
||||
Some(partition_key),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(TEST_SHARD_INDEX, SequenceNumber::new(sequence_number)),
|
||||
iox_time::SystemProvider::new().now(),
|
||||
None,
|
||||
50,
|
||||
),
|
||||
))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Utilise the progress API to query for the current state of the test
|
||||
/// shard.
|
||||
pub async fn progress(&self) -> ShardProgress {
|
||||
self.ingester
|
||||
.progresses(vec![TEST_SHARD_INDEX])
|
||||
.await
|
||||
.get(&TEST_SHARD_INDEX)
|
||||
.unwrap()
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Wait for the specified `offset` to be readable according to the external
|
||||
/// progress API.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This method panics if `offset` is not readable within 10 seconds.
|
||||
pub async fn wait_for_readable(&self, offset: SequenceNumber) {
|
||||
async {
|
||||
loop {
|
||||
let is_readable = self.progress().await.readable(offset);
|
||||
if is_readable {
|
||||
debug!(?offset, "offset reported as readable");
|
||||
return;
|
||||
}
|
||||
|
||||
trace!(?offset, "offset reported as not yet readable");
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
.with_timeout_panic(Duration::from_secs(10))
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Submit a query to the ingester's public query interface.
|
||||
pub async fn query(
|
||||
&self,
|
||||
req: IngesterQueryRequest,
|
||||
) -> Result<IngesterQueryResponse, ingester::querier_handler::Error> {
|
||||
self.ingester.query(req).await
|
||||
}
|
||||
|
||||
/// Retrieve the specified metric value.
|
||||
pub fn get_metric<T, A>(&self, name: &'static str, attrs: A) -> T::Recorder
|
||||
where
|
||||
T: MetricObserver,
|
||||
A: Into<Attributes>,
|
||||
{
|
||||
let attrs = attrs.into();
|
||||
|
||||
self.metrics
|
||||
.get_instrument::<Metric<T>>(name)
|
||||
.unwrap_or_else(|| panic!("failed to find metric {}", name))
|
||||
.get_observer(&attrs)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"failed to find metric {} with attributes {:?}",
|
||||
name, &attrs
|
||||
)
|
||||
})
|
||||
.recorder()
|
||||
}
|
||||
|
||||
/// Return a reference to the catalog.
|
||||
pub fn catalog(&self) -> &dyn Catalog {
|
||||
self.catalog.as_ref()
|
||||
}
|
||||
|
||||
/// Return the [`ShardId`] of the test shard.
|
||||
pub fn shard_id(&self) -> ShardId {
|
||||
self.shard_id
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,421 @@
|
|||
mod common;
|
||||
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use assert_matches::assert_matches;
|
||||
pub use common::*;
|
||||
use data_types::{Partition, PartitionKey, SequenceNumber};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use metric::{DurationHistogram, U64Counter, U64Gauge};
|
||||
|
||||
// Write data to an ingester through the write buffer interface, utilise the
|
||||
// progress API to wait for it to become readable, and finally query the data
|
||||
// and validate the contents.
|
||||
#[tokio::test]
|
||||
async fn test_write_query() {
|
||||
let mut ctx = TestContext::new().await;
|
||||
|
||||
ctx.ensure_namespace("test_namespace").await;
|
||||
|
||||
// Initial write
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
ctx.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
partition_key.clone(),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// A subsequent write with a non-contiguous sequence number to a different table.
|
||||
ctx.write_lp(
|
||||
"test_namespace",
|
||||
"cpu bar=2 20\ncpu bar=3 30",
|
||||
partition_key.clone(),
|
||||
7,
|
||||
)
|
||||
.await;
|
||||
|
||||
// And a third write that appends more data to the table in the initial
|
||||
// write.
|
||||
let offset = ctx
|
||||
.write_lp(
|
||||
"test_namespace",
|
||||
"bananas count=42 200",
|
||||
partition_key.clone(),
|
||||
42,
|
||||
)
|
||||
.await;
|
||||
|
||||
ctx.wait_for_readable(offset).await;
|
||||
|
||||
// Perform a query to validate the actual data buffered.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| count | greatness | time |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
|
||||
"| 42 | | 1970-01-01T00:00:00.000000200Z |",
|
||||
"+-------+-----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Assert various ingest metrics.
|
||||
let hist = ctx
|
||||
.get_metric::<DurationHistogram, _>(
|
||||
"ingester_op_apply_duration",
|
||||
&[
|
||||
("kafka_topic", TEST_TOPIC_NAME),
|
||||
("kafka_partition", "0"),
|
||||
("result", "success"),
|
||||
],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(hist.sample_count(), 3);
|
||||
|
||||
let metric = ctx
|
||||
.get_metric::<U64Counter, _>(
|
||||
"ingester_write_buffer_read_bytes",
|
||||
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(metric, 150);
|
||||
|
||||
let metric = ctx
|
||||
.get_metric::<U64Gauge, _>(
|
||||
"ingester_write_buffer_last_sequence_number",
|
||||
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(metric, 42);
|
||||
|
||||
let metric = ctx
|
||||
.get_metric::<U64Gauge, _>(
|
||||
"ingester_write_buffer_sequence_number_lag",
|
||||
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||
)
|
||||
.fetch();
|
||||
assert_eq!(metric, 0);
|
||||
|
||||
let metric = ctx
|
||||
.get_metric::<U64Gauge, _>(
|
||||
"ingester_write_buffer_last_ingest_ts",
|
||||
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||
)
|
||||
.fetch();
|
||||
let now = SystemProvider::new().now();
|
||||
assert!(metric < now.timestamp_nanos() as _);
|
||||
}
|
||||
|
||||
// Ensure an ingester correctly seeks to the offset stored in the catalog at
|
||||
// startup, skipping any empty offsets.
|
||||
#[tokio::test]
|
||||
async fn test_seek_on_init() {
|
||||
let mut ctx = TestContext::new().await;
|
||||
|
||||
// Place some writes into the write buffer.
|
||||
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
|
||||
ctx.ensure_namespace("test_namespace").await;
|
||||
ctx.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
partition_key.clone(),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// A subsequent write with a non-contiguous sequence number to a different
|
||||
// table.
|
||||
//
|
||||
// Resuming will be configured against an offset in the middle of the two
|
||||
// ranges.
|
||||
let w2 = ctx
|
||||
.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"amazing\",platanos=42 20",
|
||||
partition_key.clone(),
|
||||
7,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Wait for the writes to be processed.
|
||||
ctx.wait_for_readable(w2).await;
|
||||
|
||||
// Assert the data in memory.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| greatness | platanos | time |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| amazing | 42 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"| unbounded | | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Update the catalog state, causing the next boot of the ingester to seek
|
||||
// past the first write, but before the second write.
|
||||
ctx.catalog()
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.update_min_unpersisted_sequence_number(ctx.shard_id(), SequenceNumber::new(3))
|
||||
.await
|
||||
.expect("failed to update persisted marker");
|
||||
|
||||
// Restart the ingester.
|
||||
ctx.restart().await;
|
||||
|
||||
// Wait for the second write to become readable again.
|
||||
ctx.wait_for_readable(w2).await;
|
||||
|
||||
// Assert the data in memory now contains only w2.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| greatness | platanos | time |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| amazing | 42 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
}
|
||||
|
||||
// Ensure an ingester respects the per-partition persist watermark, skipping
|
||||
// already applied ops.
|
||||
#[tokio::test]
|
||||
async fn test_skip_previously_applied_partition_ops() {
|
||||
let mut ctx = TestContext::new().await;
|
||||
|
||||
// Place some writes into the write buffer.
|
||||
let ns = ctx.ensure_namespace("test_namespace").await;
|
||||
let partition_key = PartitionKey::from("1970-01-01");
|
||||
ctx.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
partition_key.clone(),
|
||||
5,
|
||||
)
|
||||
.await;
|
||||
let w2 = ctx
|
||||
.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"amazing\",platanos=42 20",
|
||||
partition_key.clone(),
|
||||
10,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Wait for the writes to be processed.
|
||||
ctx.wait_for_readable(w2).await;
|
||||
|
||||
// Assert the data in memory.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| greatness | platanos | time |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| amazing | 42 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"| unbounded | | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Read the partition ID of the writes above.
|
||||
let partitions = ctx
|
||||
.catalog()
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.list_by_namespace(ns.id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_matches!(&*partitions, &[Partition { .. }]);
|
||||
|
||||
// And set the per-partition persist marker after the first write, but
|
||||
// before the second.
|
||||
ctx.catalog()
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.update_persisted_sequence_number(partitions[0].id, SequenceNumber::new(6))
|
||||
.await
|
||||
.expect("failed to update persisted marker");
|
||||
|
||||
// Restart the ingester, which shall seek to the shard offset of 0, and
|
||||
// begin replaying ops.
|
||||
ctx.restart().await;
|
||||
|
||||
// Wait for the second write to become readable again.
|
||||
ctx.wait_for_readable(w2).await;
|
||||
|
||||
// Assert the partition replay skipped the first write.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| greatness | platanos | time |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
"| amazing | 42 | 1970-01-01T00:00:00.000000020Z |",
|
||||
"+-----------+----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
}
|
||||
|
||||
// Ensure a seek beyond the actual data available (i.e. into the future) causes
|
||||
// a panic to bring about a human response.
|
||||
#[tokio::test]
|
||||
#[should_panic = "attempted to seek to offset 42, but current high watermark for partition 0 is 0"]
|
||||
async fn test_seek_beyond_available_data() {
|
||||
let mut ctx = TestContext::new().await;
|
||||
|
||||
// Place a write into the write buffer so it is not empty.
|
||||
ctx.ensure_namespace("test_namespace").await;
|
||||
ctx.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
PartitionKey::from("1970-01-01"),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Update the catalog state, causing the next boot of the ingester to seek
|
||||
// past the write, beyond valid data offsets.
|
||||
ctx.catalog()
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.update_min_unpersisted_sequence_number(ctx.shard_id(), SequenceNumber::new(42))
|
||||
.await
|
||||
.expect("failed to update persisted marker");
|
||||
|
||||
// Restart the ingester.
|
||||
ctx.restart().await;
|
||||
}
|
||||
|
||||
// Ensure an ingester configured to resume from offset 1 correctly seeks to the
|
||||
// oldest available data when that offset no longer exists.
|
||||
#[tokio::test]
|
||||
async fn test_seek_dropped_offset() {
|
||||
let mut ctx = TestContext::new().await;
|
||||
|
||||
// Place a write into the write buffer so it is not empty.
|
||||
ctx.ensure_namespace("test_namespace").await;
|
||||
|
||||
// A write at offset 42
|
||||
let w1 = ctx
|
||||
.write_lp(
|
||||
"test_namespace",
|
||||
"bananas greatness=\"unbounded\" 10",
|
||||
PartitionKey::from("1970-01-01"),
|
||||
42,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Configure the ingester to seek to offset 1, which does not exist.
|
||||
ctx.catalog()
|
||||
.repositories()
|
||||
.await
|
||||
.shards()
|
||||
.update_min_unpersisted_sequence_number(ctx.shard_id(), SequenceNumber::new(1))
|
||||
.await
|
||||
.expect("failed to update persisted marker");
|
||||
|
||||
// Restart the ingester.
|
||||
ctx.restart().await;
|
||||
|
||||
// Wait for the op to be applied
|
||||
ctx.wait_for_readable(w1).await;
|
||||
|
||||
// Assert the data in memory now contains only w2.
|
||||
let data = ctx
|
||||
.query(IngesterQueryRequest {
|
||||
namespace: "test_namespace".to_string(),
|
||||
table: "bananas".to_string(),
|
||||
columns: vec![],
|
||||
predicate: None,
|
||||
})
|
||||
.await
|
||||
.expect("query should succeed")
|
||||
.into_record_batches()
|
||||
.await;
|
||||
|
||||
let expected = vec![
|
||||
"+-----------+--------------------------------+",
|
||||
"| greatness | time |",
|
||||
"+-----------+--------------------------------+",
|
||||
"| unbounded | 1970-01-01T00:00:00.000000010Z |",
|
||||
"+-----------+--------------------------------+",
|
||||
];
|
||||
assert_batches_sorted_eq!(&expected, &data);
|
||||
|
||||
// Ensure the metric was set to cause an alert for potential data loss.
|
||||
let metric = ctx
|
||||
.get_metric::<U64Counter, _>(
|
||||
"shard_reset_count",
|
||||
&[
|
||||
("kafka_topic", TEST_TOPIC_NAME),
|
||||
("kafka_partition", "0"),
|
||||
("potential_data_loss", "true"),
|
||||
],
|
||||
)
|
||||
.fetch();
|
||||
assert!(metric > 0);
|
||||
}
|
||||
Loading…
Reference in New Issue