feat: persist real (non-fake) part.+DB checkpoints
parent
8276511bd3
commit
2b0a4bbe0a
|
@ -471,8 +471,15 @@ impl Db {
|
||||||
chunk_id: u32,
|
chunk_id: u32,
|
||||||
) -> Result<Arc<DbChunk>> {
|
) -> Result<Arc<DbChunk>> {
|
||||||
let chunk = self.lockable_chunk(table_name, partition_key, chunk_id)?;
|
let chunk = self.lockable_chunk(table_name, partition_key, chunk_id)?;
|
||||||
let (_, fut) =
|
let partition = self.partition(table_name, partition_key)?;
|
||||||
lifecycle::write_chunk_to_object_store(chunk.write()).context(LifecycleError)?;
|
let (partition_checkpoint, database_checkpoint) =
|
||||||
|
lifecycle::collect_checkpoints(&partition, partition_key, table_name, &self.catalog);
|
||||||
|
let (_, fut) = lifecycle::write_chunk_to_object_store(
|
||||||
|
chunk.write(),
|
||||||
|
partition_checkpoint,
|
||||||
|
database_checkpoint,
|
||||||
|
)
|
||||||
|
.context(LifecycleError)?;
|
||||||
fut.await.context(TaskCancelled)?.context(LifecycleError)
|
fut.await.context(TaskCancelled)?.context(LifecycleError)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ pub(crate) use error::{Error, Result};
|
||||||
pub(crate) use move_chunk::move_chunk_to_read_buffer;
|
pub(crate) use move_chunk::move_chunk_to_read_buffer;
|
||||||
use persistence_windows::persistence_windows::FlushHandle;
|
use persistence_windows::persistence_windows::FlushHandle;
|
||||||
pub(crate) use unload::unload_read_buffer_chunk;
|
pub(crate) use unload::unload_read_buffer_chunk;
|
||||||
pub(crate) use write::write_chunk_to_object_store;
|
pub(crate) use write::{collect_checkpoints, write_chunk_to_object_store};
|
||||||
|
|
||||||
use super::DbChunk;
|
use super::DbChunk;
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
|
||||||
|
|
||||||
use crate::db::catalog::chunk::CatalogChunk;
|
use crate::db::catalog::chunk::CatalogChunk;
|
||||||
use crate::db::catalog::partition::Partition;
|
use crate::db::catalog::partition::Partition;
|
||||||
|
use crate::db::lifecycle::write::collect_checkpoints;
|
||||||
use crate::db::lifecycle::{
|
use crate::db::lifecycle::{
|
||||||
collect_rub, merge_schemas, new_rub_chunk, write_chunk_to_object_store,
|
collect_rub, merge_schemas, new_rub_chunk, write_chunk_to_object_store,
|
||||||
};
|
};
|
||||||
|
@ -103,6 +104,8 @@ pub(super) fn persist_chunks(
|
||||||
|
|
||||||
let persisted_rows = to_persist.rows();
|
let persisted_rows = to_persist.rows();
|
||||||
let remainder_rows = remainder.rows();
|
let remainder_rows = remainder.rows();
|
||||||
|
let (partition_checkpoint, database_checkpoint) =
|
||||||
|
collect_checkpoints(&partition, &partition_key, &table_name, &db.catalog);
|
||||||
|
|
||||||
let persist_fut = {
|
let persist_fut = {
|
||||||
let mut partition = partition.write();
|
let mut partition = partition.write();
|
||||||
|
@ -126,7 +129,7 @@ pub(super) fn persist_chunks(
|
||||||
// Drop partition lock guard after locking chunk
|
// Drop partition lock guard after locking chunk
|
||||||
std::mem::drop(partition);
|
std::mem::drop(partition);
|
||||||
|
|
||||||
write_chunk_to_object_store(to_persist)?.1
|
write_chunk_to_object_store(to_persist, partition_checkpoint, database_checkpoint)?.1
|
||||||
};
|
};
|
||||||
|
|
||||||
// Wait for write operation to complete
|
// Wait for write operation to complete
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
//! This module contains the code to write chunks to the object store
|
//! This module contains the code to write chunks to the object store
|
||||||
use crate::db::{
|
use crate::db::{
|
||||||
catalog::chunk::{CatalogChunk, ChunkStage},
|
catalog::{
|
||||||
|
chunk::{CatalogChunk, ChunkStage},
|
||||||
|
partition::Partition,
|
||||||
|
Catalog,
|
||||||
|
},
|
||||||
checkpoint_data_from_catalog,
|
checkpoint_data_from_catalog,
|
||||||
lifecycle::LockableCatalogChunk,
|
lifecycle::LockableCatalogChunk,
|
||||||
DbChunk,
|
DbChunk,
|
||||||
|
@ -24,8 +28,8 @@ use persistence_windows::checkpoint::{
|
||||||
};
|
};
|
||||||
use query::QueryChunk;
|
use query::QueryChunk;
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
use std::{collections::BTreeMap, future::Future, sync::Arc};
|
use std::{future::Future, sync::Arc};
|
||||||
use tracker::{TaskTracker, TrackedFuture, TrackedFutureExt};
|
use tracker::{RwLock, TaskTracker, TrackedFuture, TrackedFutureExt};
|
||||||
|
|
||||||
use super::error::{
|
use super::error::{
|
||||||
CommitError, Error, ParquetChunkError, Result, TransactionError, WritingToObjectStore,
|
CommitError, Error, ParquetChunkError, Result, TransactionError, WritingToObjectStore,
|
||||||
|
@ -37,6 +41,8 @@ use super::error::{
|
||||||
/// The caller can either spawn this future to tokio, or block directly on it
|
/// The caller can either spawn this future to tokio, or block directly on it
|
||||||
pub fn write_chunk_to_object_store(
|
pub fn write_chunk_to_object_store(
|
||||||
mut guard: LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>,
|
mut guard: LifecycleWriteGuard<'_, CatalogChunk, LockableCatalogChunk>,
|
||||||
|
partition_checkpoint: PartitionCheckpoint,
|
||||||
|
database_checkpoint: DatabaseCheckpoint,
|
||||||
) -> Result<(
|
) -> Result<(
|
||||||
TaskTracker<Job>,
|
TaskTracker<Job>,
|
||||||
TrackedFuture<impl Future<Output = Result<Arc<DbChunk>>> + Send>,
|
TrackedFuture<impl Future<Output = Result<Arc<DbChunk>>> + Send>,
|
||||||
|
@ -101,11 +107,6 @@ pub fn write_chunk_to_object_store(
|
||||||
//
|
//
|
||||||
// IMPORTANT: Writing must take place while holding the cleanup lock, otherwise the file might be deleted
|
// IMPORTANT: Writing must take place while holding the cleanup lock, otherwise the file might be deleted
|
||||||
// between creation and the transaction commit.
|
// between creation and the transaction commit.
|
||||||
let (partition_checkpoint, database_checkpoint) =
|
|
||||||
fake_partition_and_database_checkpoint(
|
|
||||||
Arc::clone(&addr.table_name),
|
|
||||||
Arc::clone(&addr.partition_key),
|
|
||||||
);
|
|
||||||
let metadata = IoxMetadata {
|
let metadata = IoxMetadata {
|
||||||
creation_timestamp: Utc::now(),
|
creation_timestamp: Utc::now(),
|
||||||
table_name: Arc::clone(&addr.table_name),
|
table_name: Arc::clone(&addr.table_name),
|
||||||
|
@ -189,22 +190,37 @@ pub fn write_chunk_to_object_store(
|
||||||
Ok((tracker, fut.track(registration)))
|
Ok((tracker, fut.track(registration)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fake until we have the split implementation in-place.
|
/// Construct partition and database checkpoint for the given partition in the given catalog.
|
||||||
fn fake_partition_and_database_checkpoint(
|
pub fn collect_checkpoints(
|
||||||
table_name: Arc<str>,
|
partition: &RwLock<Partition>,
|
||||||
partition_key: Arc<str>,
|
partition_key: &str,
|
||||||
|
table_name: &str,
|
||||||
|
catalog: &Catalog,
|
||||||
) -> (PartitionCheckpoint, DatabaseCheckpoint) {
|
) -> (PartitionCheckpoint, DatabaseCheckpoint) {
|
||||||
// create partition checkpoint
|
// calculate checkpoint
|
||||||
let sequencer_numbers = BTreeMap::new();
|
let mut checkpoint_builder = {
|
||||||
let min_unpersisted_timestamp = Utc::now();
|
// Record partition checkpoint and then flush persisted data from persistence windows.
|
||||||
let partition_checkpoint = PartitionCheckpoint::new(
|
let partition = partition.read();
|
||||||
table_name,
|
PersistCheckpointBuilder::new(
|
||||||
partition_key,
|
partition
|
||||||
sequencer_numbers,
|
.partition_checkpoint()
|
||||||
min_unpersisted_timestamp,
|
.expect("persistence window removed"),
|
||||||
);
|
)
|
||||||
|
};
|
||||||
|
|
||||||
// build database checkpoint
|
// collect checkpoints of all other partitions
|
||||||
let builder = PersistCheckpointBuilder::new(partition_checkpoint);
|
if let Ok(table) = catalog.table(table_name) {
|
||||||
builder.build()
|
for partition in table.partitions() {
|
||||||
|
let partition = partition.read();
|
||||||
|
if partition.key() == partition_key {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(partition_checkpoint) = partition.partition_checkpoint() {
|
||||||
|
checkpoint_builder.register_other_partition(&partition_checkpoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpoint_builder.build()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue