test: write, query & progress API coverage
This commit adds a new test that exercises all major external APIs of the ingester: * Writing data via the write buffer * Waiting for data to be readable via the progress API * Querying data and and asserting the contents This should provide basic integration coverage for the Ingester internals. This commit also removes a similar test (though with less coverage) that was tightly coupled to the existing buffering structures.pull/24376/head
parent
b12d472a17
commit
7729494f61
|
@ -435,11 +435,10 @@ impl<T> Drop for IngestHandlerImpl<T> {
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::{num::NonZeroU32, ops::DerefMut};
|
use std::{num::NonZeroU32, ops::DerefMut};
|
||||||
|
|
||||||
use data_types::{Namespace, NamespaceSchema, QueryPool, Sequence, SequenceNumber};
|
use data_types::{Namespace, NamespaceSchema, Sequence, SequenceNumber};
|
||||||
use dml::{DmlMeta, DmlWrite};
|
use dml::{DmlMeta, DmlWrite};
|
||||||
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
||||||
use iox_time::Time;
|
use iox_time::Time;
|
||||||
use metric::{Attributes, Metric, U64Counter, U64Gauge};
|
|
||||||
use mutable_batch_lp::lines_to_batches;
|
use mutable_batch_lp::lines_to_batches;
|
||||||
use object_store::memory::InMemory;
|
use object_store::memory::InMemory;
|
||||||
use test_helpers::maybe_start_logging;
|
use test_helpers::maybe_start_logging;
|
||||||
|
@ -448,179 +447,9 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::data::{partition::SnapshotBatch, table::TableName};
|
use crate::data::{partition::SnapshotBatch, table::TableName};
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn read_from_write_buffer_write_to_mutable_buffer() {
|
|
||||||
let ingester = TestIngester::new().await;
|
|
||||||
|
|
||||||
let schema = NamespaceSchema::new(
|
|
||||||
ingester.namespace.id,
|
|
||||||
ingester.topic.id,
|
|
||||||
ingester.query_pool.id,
|
|
||||||
100,
|
|
||||||
);
|
|
||||||
let mut txn = ingester.catalog.start_transaction().await.unwrap();
|
|
||||||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
|
||||||
let ingest_ts2 = Time::from_timestamp_millis(1337);
|
|
||||||
let w1 = DmlWrite::new(
|
|
||||||
"foo",
|
|
||||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
|
||||||
Some("1970-01-01".into()),
|
|
||||||
DmlMeta::sequenced(
|
|
||||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(0)),
|
|
||||||
ingest_ts1,
|
|
||||||
None,
|
|
||||||
50,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
let schema = validate_or_insert_schema(w1.tables(), &schema, txn.deref_mut())
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.unwrap();
|
|
||||||
ingester.write_buffer_state.push_write(w1);
|
|
||||||
let w2 = DmlWrite::new(
|
|
||||||
"foo",
|
|
||||||
lines_to_batches("cpu bar=2 20\ncpu bar=3 30", 0).unwrap(),
|
|
||||||
Some("1970-01-01".into()),
|
|
||||||
DmlMeta::sequenced(
|
|
||||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(7)),
|
|
||||||
ingest_ts2,
|
|
||||||
None,
|
|
||||||
150,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
let _schema = validate_or_insert_schema(w2.tables(), &schema, txn.deref_mut())
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.unwrap();
|
|
||||||
ingester.write_buffer_state.push_write(w2);
|
|
||||||
let w3 = DmlWrite::new(
|
|
||||||
"foo",
|
|
||||||
lines_to_batches("a b=2 200", 0).unwrap(),
|
|
||||||
Some("1970-01-01".into()),
|
|
||||||
DmlMeta::sequenced(
|
|
||||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(9)),
|
|
||||||
ingest_ts2,
|
|
||||||
None,
|
|
||||||
150,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
let _schema = validate_or_insert_schema(w3.tables(), &schema, txn.deref_mut())
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.unwrap();
|
|
||||||
ingester.write_buffer_state.push_write(w3);
|
|
||||||
txn.commit().await.unwrap();
|
|
||||||
|
|
||||||
// give the writes some time to go through the buffer. Exit once we've verified there's
|
|
||||||
// data in there from both writes.
|
|
||||||
tokio::time::timeout(Duration::from_secs(2), async {
|
|
||||||
let ns_name = ingester.namespace.name.into();
|
|
||||||
let table_name = TableName::from("a");
|
|
||||||
loop {
|
|
||||||
let mut has_measurement = false;
|
|
||||||
|
|
||||||
if let Some(data) = ingester.ingester.data.shard(ingester.shard.id) {
|
|
||||||
if let Some(data) = data.namespace(&ns_name) {
|
|
||||||
// verify there's data in the buffer
|
|
||||||
if let Some((b, _)) = data.snapshot(&table_name, &"1970-01-01".into()).await
|
|
||||||
{
|
|
||||||
if let Some(b) = b.first() {
|
|
||||||
if b.data.num_rows() > 0 {
|
|
||||||
has_measurement = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// and ensure that the shard state was actually updated
|
|
||||||
let shard = ingester
|
|
||||||
.catalog
|
|
||||||
.repositories()
|
|
||||||
.await
|
|
||||||
.shards()
|
|
||||||
.create_or_get(&ingester.topic, ingester.shard_index)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if has_measurement
|
|
||||||
&& shard.min_unpersisted_sequence_number == SequenceNumber::new(9)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.expect("timeout");
|
|
||||||
|
|
||||||
let observation = ingester
|
|
||||||
.metrics
|
|
||||||
.get_instrument::<Metric<DurationHistogram>>("ingester_op_apply_duration")
|
|
||||||
.unwrap()
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("kafka_topic", "whatevs"),
|
|
||||||
("kafka_partition", "0"),
|
|
||||||
("result", "success"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
let hits = observation.buckets.iter().map(|b| b.count).sum::<u64>();
|
|
||||||
assert_eq!(hits, 3);
|
|
||||||
|
|
||||||
let observation = ingester
|
|
||||||
.metrics
|
|
||||||
.get_instrument::<Metric<U64Counter>>("ingester_write_buffer_read_bytes")
|
|
||||||
.unwrap()
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("kafka_topic", "whatevs"),
|
|
||||||
("kafka_partition", "0"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
assert_eq!(observation, 350);
|
|
||||||
|
|
||||||
let observation = ingester
|
|
||||||
.metrics
|
|
||||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_last_sequence_number")
|
|
||||||
.unwrap()
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("kafka_topic", "whatevs"),
|
|
||||||
("kafka_partition", "0"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
assert_eq!(observation, 9);
|
|
||||||
|
|
||||||
let observation = ingester
|
|
||||||
.metrics
|
|
||||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_sequence_number_lag")
|
|
||||||
.unwrap()
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("kafka_topic", "whatevs"),
|
|
||||||
("kafka_partition", "0"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
assert_eq!(observation, 0);
|
|
||||||
|
|
||||||
let observation = ingester
|
|
||||||
.metrics
|
|
||||||
.get_instrument::<Metric<U64Gauge>>("ingester_write_buffer_last_ingest_ts")
|
|
||||||
.unwrap()
|
|
||||||
.get_observer(&Attributes::from(&[
|
|
||||||
("kafka_topic", "whatevs"),
|
|
||||||
("kafka_partition", "0"),
|
|
||||||
]))
|
|
||||||
.unwrap()
|
|
||||||
.fetch();
|
|
||||||
assert_eq!(observation, ingest_ts2.timestamp_nanos() as u64);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_shutdown() {
|
async fn test_shutdown() {
|
||||||
let ingester = TestIngester::new().await.ingester;
|
let (ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||||
|
|
||||||
// does not exit w/o shutdown
|
// does not exit w/o shutdown
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
|
@ -638,7 +467,7 @@ mod tests {
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
#[should_panic(expected = "Background worker 'bad_task' exited early!")]
|
#[should_panic(expected = "Background worker 'bad_task' exited early!")]
|
||||||
async fn test_join_task_early_shutdown() {
|
async fn test_join_task_early_shutdown() {
|
||||||
let mut ingester = TestIngester::new().await.ingester;
|
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||||
|
|
||||||
let shutdown_task = tokio::spawn(async {
|
let shutdown_task = tokio::spawn(async {
|
||||||
// It does nothing! and stops.
|
// It does nothing! and stops.
|
||||||
|
@ -655,7 +484,7 @@ mod tests {
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
#[should_panic(expected = "JoinError::Panic")]
|
#[should_panic(expected = "JoinError::Panic")]
|
||||||
async fn test_join_task_panic() {
|
async fn test_join_task_panic() {
|
||||||
let mut ingester = TestIngester::new().await.ingester;
|
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||||
|
|
||||||
let shutdown_task = tokio::spawn(async {
|
let shutdown_task = tokio::spawn(async {
|
||||||
panic!("bananas");
|
panic!("bananas");
|
||||||
|
@ -946,7 +775,7 @@ mod tests {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn limits_concurrent_queries() {
|
async fn limits_concurrent_queries() {
|
||||||
let mut ingester = TestIngester::new().await;
|
let (mut ingester, _, _) = ingester_test_setup(vec![], 0, true).await;
|
||||||
let request = IngesterQueryRequest {
|
let request = IngesterQueryRequest {
|
||||||
namespace: "foo".to_string(),
|
namespace: "foo".to_string(),
|
||||||
table: "cpu".to_string(),
|
table: "cpu".to_string(),
|
||||||
|
@ -954,93 +783,14 @@ mod tests {
|
||||||
predicate: None,
|
predicate: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let res = ingester.ingester.query(request.clone()).await.unwrap_err();
|
let res = ingester.query(request.clone()).await.unwrap_err();
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
res,
|
res,
|
||||||
crate::querier_handler::Error::NamespaceNotFound { .. }
|
crate::querier_handler::Error::NamespaceNotFound { .. }
|
||||||
));
|
));
|
||||||
|
|
||||||
ingester.ingester.request_sem = Semaphore::new(0);
|
ingester.request_sem = Semaphore::new(0);
|
||||||
let res = ingester.ingester.query(request).await.unwrap_err();
|
let res = ingester.query(request).await.unwrap_err();
|
||||||
assert!(matches!(res, crate::querier_handler::Error::RequestLimit));
|
assert!(matches!(res, crate::querier_handler::Error::RequestLimit));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TestIngester {
|
|
||||||
catalog: Arc<dyn Catalog>,
|
|
||||||
shard: Shard,
|
|
||||||
namespace: Namespace,
|
|
||||||
topic: TopicMetadata,
|
|
||||||
shard_index: ShardIndex,
|
|
||||||
query_pool: QueryPool,
|
|
||||||
metrics: Arc<metric::Registry>,
|
|
||||||
write_buffer_state: MockBufferSharedState,
|
|
||||||
ingester: IngestHandlerImpl,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TestIngester {
|
|
||||||
async fn new() -> Self {
|
|
||||||
let metrics: Arc<metric::Registry> = Default::default();
|
|
||||||
let catalog: Arc<dyn Catalog> = Arc::new(MemCatalog::new(Arc::clone(&metrics)));
|
|
||||||
|
|
||||||
let mut txn = catalog.start_transaction().await.unwrap();
|
|
||||||
let topic = txn.topics().create_or_get("whatevs").await.unwrap();
|
|
||||||
let query_pool = txn.query_pools().create_or_get("whatevs").await.unwrap();
|
|
||||||
let shard_index = ShardIndex::new(0);
|
|
||||||
let namespace = txn
|
|
||||||
.namespaces()
|
|
||||||
.create("foo", "inf", topic.id, query_pool.id)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let shard = txn
|
|
||||||
.shards()
|
|
||||||
.create_or_get(&topic, shard_index)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
txn.commit().await.unwrap();
|
|
||||||
|
|
||||||
let mut shard_states = BTreeMap::new();
|
|
||||||
shard_states.insert(shard_index, shard);
|
|
||||||
|
|
||||||
let write_buffer_state =
|
|
||||||
MockBufferSharedState::empty_with_n_shards(NonZeroU32::try_from(1).unwrap());
|
|
||||||
let reading: Arc<dyn WriteBufferReading> =
|
|
||||||
Arc::new(MockBufferForReading::new(write_buffer_state.clone(), None).unwrap());
|
|
||||||
let object_store = Arc::new(InMemory::new());
|
|
||||||
|
|
||||||
let lifecycle_config = LifecycleConfig::new(
|
|
||||||
1000000,
|
|
||||||
1000,
|
|
||||||
1000,
|
|
||||||
Duration::from_secs(10),
|
|
||||||
Duration::from_secs(10),
|
|
||||||
10000000,
|
|
||||||
);
|
|
||||||
let ingester = IngestHandlerImpl::new(
|
|
||||||
lifecycle_config,
|
|
||||||
topic.clone(),
|
|
||||||
shard_states,
|
|
||||||
Arc::clone(&catalog),
|
|
||||||
object_store,
|
|
||||||
reading,
|
|
||||||
Arc::new(Executor::new(1)),
|
|
||||||
Arc::clone(&metrics),
|
|
||||||
false,
|
|
||||||
1,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Self {
|
|
||||||
catalog,
|
|
||||||
shard,
|
|
||||||
namespace,
|
|
||||||
topic,
|
|
||||||
shard_index,
|
|
||||||
query_pool,
|
|
||||||
metrics,
|
|
||||||
write_buffer_state,
|
|
||||||
ingester,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
mod common;
|
||||||
|
|
||||||
|
use arrow_util::assert_batches_sorted_eq;
|
||||||
|
pub use common::*;
|
||||||
|
use data_types::PartitionKey;
|
||||||
|
use generated_types::ingester::IngesterQueryRequest;
|
||||||
|
use iox_time::{SystemProvider, TimeProvider};
|
||||||
|
use metric::{DurationHistogram, U64Counter, U64Gauge};
|
||||||
|
|
||||||
|
// Write data to an ingester through the write buffer interface, utilise the
|
||||||
|
// progress API to wait for it to become readable, and finally query the data
|
||||||
|
// and validate the contents.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_write_query() {
|
||||||
|
let mut ctx = TestContext::new().await;
|
||||||
|
|
||||||
|
ctx.ensure_namespace("test_namespace").await;
|
||||||
|
|
||||||
|
// Initial write
|
||||||
|
let partition_key = PartitionKey::from("1970-01-01");
|
||||||
|
ctx.write_lp(
|
||||||
|
"test_namespace",
|
||||||
|
"bananas greatness=\"unbounded\" 10",
|
||||||
|
partition_key.clone(),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// A subsequent write with a non-contiguous sequence number to a different table.
|
||||||
|
ctx.write_lp(
|
||||||
|
"test_namespace",
|
||||||
|
"cpu bar=2 20\ncpu bar=3 30",
|
||||||
|
partition_key.clone(),
|
||||||
|
7,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// And a third write that appends more data to the table in the initial
|
||||||
|
// write.
|
||||||
|
let offset = ctx
|
||||||
|
.write_lp(
|
||||||
|
"test_namespace",
|
||||||
|
"bananas count=42 200",
|
||||||
|
partition_key.clone(),
|
||||||
|
42,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
ctx.wait_for_readable(offset).await;
|
||||||
|
|
||||||
|
// Perform a query to validate the actual data buffered.
|
||||||
|
let data = ctx
|
||||||
|
.query(IngesterQueryRequest {
|
||||||
|
namespace: "test_namespace".to_string(),
|
||||||
|
table: "bananas".to_string(),
|
||||||
|
columns: vec![],
|
||||||
|
predicate: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("query should succeed")
|
||||||
|
.into_record_batches()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let expected = vec![
|
||||||
|
"+-------+-----------+--------------------------------+",
|
||||||
|
"| count | greatness | time |",
|
||||||
|
"+-------+-----------+--------------------------------+",
|
||||||
|
"| | unbounded | 1970-01-01T00:00:00.000000010Z |",
|
||||||
|
"| 42 | | 1970-01-01T00:00:00.000000200Z |",
|
||||||
|
"+-------+-----------+--------------------------------+",
|
||||||
|
];
|
||||||
|
assert_batches_sorted_eq!(&expected, &data);
|
||||||
|
|
||||||
|
// Assert various ingest metrics.
|
||||||
|
let hist = ctx
|
||||||
|
.get_metric::<DurationHistogram, _>(
|
||||||
|
"ingester_op_apply_duration",
|
||||||
|
&[
|
||||||
|
("kafka_topic", TEST_TOPIC_NAME),
|
||||||
|
("kafka_partition", "0"),
|
||||||
|
("result", "success"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.fetch();
|
||||||
|
assert_eq!(hist.sample_count(), 3);
|
||||||
|
|
||||||
|
let metric = ctx
|
||||||
|
.get_metric::<U64Counter, _>(
|
||||||
|
"ingester_write_buffer_read_bytes",
|
||||||
|
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||||
|
)
|
||||||
|
.fetch();
|
||||||
|
assert_eq!(metric, 150);
|
||||||
|
|
||||||
|
let metric = ctx
|
||||||
|
.get_metric::<U64Gauge, _>(
|
||||||
|
"ingester_write_buffer_last_sequence_number",
|
||||||
|
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||||
|
)
|
||||||
|
.fetch();
|
||||||
|
assert_eq!(metric, 42);
|
||||||
|
|
||||||
|
let metric = ctx
|
||||||
|
.get_metric::<U64Gauge, _>(
|
||||||
|
"ingester_write_buffer_sequence_number_lag",
|
||||||
|
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||||
|
)
|
||||||
|
.fetch();
|
||||||
|
assert_eq!(metric, 0);
|
||||||
|
|
||||||
|
let metric = ctx
|
||||||
|
.get_metric::<U64Gauge, _>(
|
||||||
|
"ingester_write_buffer_last_ingest_ts",
|
||||||
|
&[("kafka_topic", TEST_TOPIC_NAME), ("kafka_partition", "0")],
|
||||||
|
)
|
||||||
|
.fetch();
|
||||||
|
let now = SystemProvider::new().now();
|
||||||
|
assert!(metric < now.timestamp_nanos() as _);
|
||||||
|
}
|
Loading…
Reference in New Issue