refactor: ref PartitionData by key & ID
Changes the TableData to hold a map of partition key -> PartitionData, and partition ID -> PartitionData simultaneously. This allows for cheap lookups when the caller holds an ID. This commit also manages to internalise the partition map within the TableData - one less pub / peeking! This commit also switches from a BTreeMap to a HashMap as the backing collection, as maintaining key ordering doesn't appear to be necessary.pull/24376/head
parent
0847cc5458
commit
f9bf86927d
|
@ -810,7 +810,9 @@ mod tests {
|
||||||
let mem_table = n.table_data("mem").unwrap();
|
let mem_table = n.table_data("mem").unwrap();
|
||||||
assert!(n.table_data("mem").is_some());
|
assert!(n.table_data("mem").is_some());
|
||||||
let mem_table = mem_table.write().await;
|
let mem_table = mem_table.write().await;
|
||||||
let p = mem_table.partition_data.get(&"1970-01-01".into()).unwrap();
|
let p = mem_table
|
||||||
|
.get_partition_by_key(&"1970-01-01".into())
|
||||||
|
.unwrap();
|
||||||
p.partition_id()
|
p.partition_id()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -952,7 +954,9 @@ mod tests {
|
||||||
let mem_table = n.table_data("mem").unwrap();
|
let mem_table = n.table_data("mem").unwrap();
|
||||||
assert!(n.table_data("cpu").is_some());
|
assert!(n.table_data("cpu").is_some());
|
||||||
let mem_table = mem_table.write().await;
|
let mem_table = mem_table.write().await;
|
||||||
let p = mem_table.partition_data.get(&"1970-01-01".into()).unwrap();
|
let p = mem_table
|
||||||
|
.get_partition_by_key(&"1970-01-01".into())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
table_id = mem_table.table_id();
|
table_id = mem_table.table_id();
|
||||||
partition_id = p.partition_id();
|
partition_id = p.partition_id();
|
||||||
|
@ -1352,7 +1356,7 @@ mod tests {
|
||||||
{
|
{
|
||||||
let table_data = data.table_data("mem").unwrap();
|
let table_data = data.table_data("mem").unwrap();
|
||||||
let table = table_data.read().await;
|
let table = table_data.read().await;
|
||||||
let p = table.partition_data.get(&"1970-01-01".into()).unwrap();
|
let p = table.get_partition_by_key(&"1970-01-01".into()).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
p.max_persisted_sequence_number(),
|
p.max_persisted_sequence_number(),
|
||||||
Some(SequenceNumber::new(1))
|
Some(SequenceNumber::new(1))
|
||||||
|
@ -1368,7 +1372,7 @@ mod tests {
|
||||||
|
|
||||||
let table_data = data.table_data("mem").unwrap();
|
let table_data = data.table_data("mem").unwrap();
|
||||||
let table = table_data.read().await;
|
let table = table_data.read().await;
|
||||||
let partition = table.partition_data.get(&"1970-01-01".into()).unwrap();
|
let partition = table.get_partition_by_key(&"1970-01-01".into()).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
partition.data.buffer.as_ref().unwrap().min_sequence_number,
|
partition.data.buffer.as_ref().unwrap().min_sequence_number,
|
||||||
SequenceNumber::new(2)
|
SequenceNumber::new(2)
|
||||||
|
|
|
@ -226,7 +226,7 @@ impl NamespaceData {
|
||||||
if let Some(t) = self.table_data(table_name) {
|
if let Some(t) = self.table_data(table_name) {
|
||||||
let mut t = t.write().await;
|
let mut t = t.write().await;
|
||||||
|
|
||||||
return t.partition_data.get_mut(partition_key).map(|p| {
|
return t.get_partition_by_key_mut(partition_key).map(|p| {
|
||||||
p.data
|
p.data
|
||||||
.generate_snapshot()
|
.generate_snapshot()
|
||||||
.expect("snapshot on mutable batch should never fail");
|
.expect("snapshot on mutable batch should never fail");
|
||||||
|
@ -249,8 +249,7 @@ impl NamespaceData {
|
||||||
let mut table_data = table_data.write().await;
|
let mut table_data = table_data.write().await;
|
||||||
|
|
||||||
return table_data
|
return table_data
|
||||||
.partition_data
|
.get_partition_by_key_mut(partition_key)
|
||||||
.get_mut(partition_key)
|
|
||||||
.and_then(|partition_data| partition_data.snapshot_to_persisting_batch());
|
.and_then(|partition_data| partition_data.snapshot_to_persisting_batch());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,7 +320,7 @@ impl NamespaceData {
|
||||||
) {
|
) {
|
||||||
if let Some(t) = self.table_data(table_name) {
|
if let Some(t) = self.table_data(table_name) {
|
||||||
let mut t = t.write().await;
|
let mut t = t.write().await;
|
||||||
let partition = t.partition_data.get_mut(partition_key);
|
let partition = t.get_partition_by_key_mut(partition_key);
|
||||||
|
|
||||||
if let Some(p) = partition {
|
if let Some(p) = partition {
|
||||||
p.mark_persisted(sequence_number);
|
p.mark_persisted(sequence_number);
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
//! Table level data buffer structures.
|
//! Table level data buffer structures.
|
||||||
|
|
||||||
use std::{collections::BTreeMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use data_types::{
|
use data_types::{
|
||||||
DeletePredicate, NamespaceId, PartitionKey, SequenceNumber, ShardId, TableId, Timestamp,
|
DeletePredicate, NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId,
|
||||||
|
Timestamp,
|
||||||
};
|
};
|
||||||
use iox_catalog::interface::Catalog;
|
use iox_catalog::interface::Catalog;
|
||||||
use iox_query::exec::Executor;
|
use iox_query::exec::Executor;
|
||||||
|
@ -16,6 +17,39 @@ use super::partition::{
|
||||||
};
|
};
|
||||||
use crate::lifecycle::LifecycleHandle;
|
use crate::lifecycle::LifecycleHandle;
|
||||||
|
|
||||||
|
/// A double-referenced map where [`PartitionData`] can be looked up by
|
||||||
|
/// [`PartitionKey`], or ID.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct DoubleRef {
|
||||||
|
// TODO(4880): this can be removed when IDs are sent over the wire.
|
||||||
|
by_key: HashMap<PartitionKey, PartitionData>,
|
||||||
|
by_id: HashMap<PartitionId, PartitionKey>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DoubleRef {
|
||||||
|
fn insert(&mut self, ns: PartitionData) {
|
||||||
|
let id = ns.partition_id();
|
||||||
|
let key = ns.partition_key().clone();
|
||||||
|
|
||||||
|
assert!(self.by_key.insert(key.clone(), ns).is_none());
|
||||||
|
assert!(self.by_id.insert(id, key).is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn by_key(&self, key: &PartitionKey) -> Option<&PartitionData> {
|
||||||
|
self.by_key.get(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn by_key_mut(&mut self, key: &PartitionKey) -> Option<&mut PartitionData> {
|
||||||
|
self.by_key.get_mut(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn by_id_mut(&mut self, id: PartitionId) -> Option<&mut PartitionData> {
|
||||||
|
let key = self.by_id.get(&id)?.clone();
|
||||||
|
self.by_key_mut(&key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Data of a Table in a given Namesapce that belongs to a given Shard
|
/// Data of a Table in a given Namesapce that belongs to a given Shard
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct TableData {
|
pub(crate) struct TableData {
|
||||||
|
@ -34,8 +68,8 @@ pub(crate) struct TableData {
|
||||||
/// `(key, shard, table)` triplet.
|
/// `(key, shard, table)` triplet.
|
||||||
partition_provider: Arc<dyn PartitionProvider>,
|
partition_provider: Arc<dyn PartitionProvider>,
|
||||||
|
|
||||||
// Map pf partition key to its data
|
// Map of partition key to its data
|
||||||
pub(super) partition_data: BTreeMap<PartitionKey, PartitionData>,
|
partition_data: DoubleRef,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TableData {
|
impl TableData {
|
||||||
|
@ -71,6 +105,7 @@ impl TableData {
|
||||||
/// Return parquet_max_sequence_number
|
/// Return parquet_max_sequence_number
|
||||||
pub(super) fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
pub(super) fn parquet_max_sequence_number(&self) -> Option<SequenceNumber> {
|
||||||
self.partition_data
|
self.partition_data
|
||||||
|
.by_key
|
||||||
.values()
|
.values()
|
||||||
.map(|p| p.max_persisted_sequence_number())
|
.map(|p| p.max_persisted_sequence_number())
|
||||||
.max()
|
.max()
|
||||||
|
@ -92,7 +127,7 @@ impl TableData {
|
||||||
partition_key: PartitionKey,
|
partition_key: PartitionKey,
|
||||||
lifecycle_handle: &dyn LifecycleHandle,
|
lifecycle_handle: &dyn LifecycleHandle,
|
||||||
) -> Result<bool, super::Error> {
|
) -> Result<bool, super::Error> {
|
||||||
let partition_data = match self.partition_data.get_mut(&partition_key) {
|
let partition_data = match self.partition_data.by_key.get_mut(&partition_key) {
|
||||||
Some(p) => p,
|
Some(p) => p,
|
||||||
None => {
|
None => {
|
||||||
let p = self
|
let p = self
|
||||||
|
@ -105,12 +140,9 @@ impl TableData {
|
||||||
Arc::clone(&self.table_name),
|
Arc::clone(&self.table_name),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
// Add the partition to the map.
|
// Add the double-referenced partition to the map.
|
||||||
assert!(self
|
self.partition_data.insert(p);
|
||||||
.partition_data
|
self.partition_data.by_key_mut(&partition_key).unwrap()
|
||||||
.insert(partition_key.clone(), p)
|
|
||||||
.is_none());
|
|
||||||
self.partition_data.get_mut(&partition_key).unwrap()
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -171,15 +203,42 @@ impl TableData {
|
||||||
self.tombstone_max_sequence_number = Some(sequence_number);
|
self.tombstone_max_sequence_number = Some(sequence_number);
|
||||||
|
|
||||||
// modify one partition at a time
|
// modify one partition at a time
|
||||||
for data in self.partition_data.values_mut() {
|
for data in self.partition_data.by_key.values_mut() {
|
||||||
data.buffer_tombstone(executor, tombstone.clone()).await;
|
data.buffer_tombstone(executor, tombstone.clone()).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the [`PartitionData`] for the specified ID.
|
||||||
|
#[allow(unused)]
|
||||||
|
pub(crate) fn get_partition(
|
||||||
|
&mut self,
|
||||||
|
partition_id: PartitionId,
|
||||||
|
) -> Option<&mut PartitionData> {
|
||||||
|
self.partition_data.by_id_mut(partition_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the [`PartitionData`] for the specified partition key.
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn get_partition_by_key(
|
||||||
|
&self,
|
||||||
|
partition_key: &PartitionKey,
|
||||||
|
) -> Option<&PartitionData> {
|
||||||
|
self.partition_data.by_key(partition_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the [`PartitionData`] for the specified partition key.
|
||||||
|
pub(crate) fn get_partition_by_key_mut(
|
||||||
|
&mut self,
|
||||||
|
partition_key: &PartitionKey,
|
||||||
|
) -> Option<&mut PartitionData> {
|
||||||
|
self.partition_data.by_key_mut(partition_key)
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn unpersisted_partition_data(&self) -> Vec<UnpersistedPartitionData> {
|
pub(crate) fn unpersisted_partition_data(&self) -> Vec<UnpersistedPartitionData> {
|
||||||
self.partition_data
|
self.partition_data
|
||||||
|
.by_key
|
||||||
.values()
|
.values()
|
||||||
.map(|p| UnpersistedPartitionData {
|
.map(|p| UnpersistedPartitionData {
|
||||||
partition_id: p.partition_id(),
|
partition_id: p.partition_id(),
|
||||||
|
@ -204,6 +263,7 @@ impl TableData {
|
||||||
};
|
};
|
||||||
|
|
||||||
self.partition_data
|
self.partition_data
|
||||||
|
.by_key
|
||||||
.values()
|
.values()
|
||||||
.fold(progress, |progress, partition_data| {
|
.fold(progress, |progress, partition_data| {
|
||||||
progress.combine(partition_data.progress())
|
progress.combine(partition_data.progress())
|
||||||
|
@ -248,6 +308,65 @@ mod tests {
|
||||||
const PARTITION_KEY: &str = "platanos";
|
const PARTITION_KEY: &str = "platanos";
|
||||||
const PARTITION_ID: PartitionId = PartitionId::new(0);
|
const PARTITION_ID: PartitionId = PartitionId::new(0);
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_partition_double_ref() {
|
||||||
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
|
let catalog: Arc<dyn Catalog> =
|
||||||
|
Arc::new(iox_catalog::mem::MemCatalog::new(Arc::clone(&metrics)));
|
||||||
|
|
||||||
|
// Populate the catalog with the shard / namespace / table
|
||||||
|
let (shard_id, ns_id, table_id) =
|
||||||
|
populate_catalog(&*catalog, SHARD_INDEX, NAMESPACE_NAME, TABLE_NAME).await;
|
||||||
|
|
||||||
|
// Configure the mock partition provider to return a partition for this
|
||||||
|
// table ID.
|
||||||
|
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
|
||||||
|
PartitionData::new(
|
||||||
|
PARTITION_ID,
|
||||||
|
PARTITION_KEY.into(),
|
||||||
|
shard_id,
|
||||||
|
ns_id,
|
||||||
|
table_id,
|
||||||
|
TABLE_NAME.into(),
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut table = TableData::new(
|
||||||
|
table_id,
|
||||||
|
TABLE_NAME,
|
||||||
|
shard_id,
|
||||||
|
ns_id,
|
||||||
|
None,
|
||||||
|
partition_provider,
|
||||||
|
);
|
||||||
|
|
||||||
|
let batch = lines_to_batches(r#"bananas,bat=man value=24 42"#, 0)
|
||||||
|
.unwrap()
|
||||||
|
.remove(TABLE_NAME)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Assert the table does not contain the test partition
|
||||||
|
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_none());
|
||||||
|
assert!(table.partition_data.by_id_mut(PARTITION_ID).is_none());
|
||||||
|
|
||||||
|
// Write some test data
|
||||||
|
let pause = table
|
||||||
|
.buffer_table_write(
|
||||||
|
SequenceNumber::new(42),
|
||||||
|
batch,
|
||||||
|
PARTITION_KEY.into(),
|
||||||
|
&MockLifecycleHandle::default(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("buffer op should succeed");
|
||||||
|
assert!(!pause);
|
||||||
|
|
||||||
|
// Referencing the partition should succeed
|
||||||
|
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_some());
|
||||||
|
assert!(table.partition_data.by_id_mut(PARTITION_ID).is_some());
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_bad_write_memory_counting() {
|
async fn test_bad_write_memory_counting() {
|
||||||
let metrics = Arc::new(metric::Registry::default());
|
let metrics = Arc::new(metric::Registry::default());
|
||||||
|
@ -291,7 +410,7 @@ mod tests {
|
||||||
let handle = MockLifecycleHandle::default();
|
let handle = MockLifecycleHandle::default();
|
||||||
|
|
||||||
// Assert the table does not contain the test partition
|
// Assert the table does not contain the test partition
|
||||||
assert!(table.partition_data.get(&PARTITION_KEY.into()).is_none());
|
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_none());
|
||||||
|
|
||||||
// Write some test data
|
// Write some test data
|
||||||
let pause = table
|
let pause = table
|
||||||
|
@ -306,7 +425,7 @@ mod tests {
|
||||||
assert!(!pause);
|
assert!(!pause);
|
||||||
|
|
||||||
// Referencing the partition should succeed
|
// Referencing the partition should succeed
|
||||||
assert!(table.partition_data.get(&PARTITION_KEY.into()).is_some());
|
assert!(table.partition_data.by_key(&PARTITION_KEY.into()).is_some());
|
||||||
|
|
||||||
// And the lifecycle handle was called with the expected values
|
// And the lifecycle handle was called with the expected values
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in New Issue