feat: reduce lifecycle lock scope (#2242) (#2810)

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
pull/24376/head
Raphael Taylor-Davies 2021-10-12 18:34:16 +01:00 committed by GitHub
parent 974a64126a
commit 5b69bb0d72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 85 additions and 71 deletions

View File

@ -439,7 +439,7 @@ where
/// The core policy logic
///
/// Returns a future that resolves when this method should be called next
pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'_, ()> {
pub fn check_for_work(&mut self, now: DateTime<Utc>) -> BoxFuture<'static, ()> {
// Any time-consuming work should be spawned as tokio tasks and not
// run directly within this loop

View File

@ -1133,7 +1133,7 @@ impl DatabaseStateCatalogLoaded {
let db = Arc::clone(&self.db);
// TODO: Pull write buffer and lifecycle out of Db
db.unsuppress_persistence().await;
db.unsuppress_persistence();
let rules = self.provided_rules.rules();
let write_buffer_factory = shared.application.write_buffer_factory();

View File

@ -298,7 +298,7 @@ pub struct Db {
/// This is stored here for the following reasons:
/// - to control the persistence suppression via a [`Db::unsuppress_persistence`]
/// - to keep the lifecycle state (e.g. the number of running compactions) around
lifecycle_policy: tokio::sync::Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>,
lifecycle_policy: Mutex<Option<::lifecycle::LifecyclePolicy<WeakDb>>>,
time_provider: Arc<dyn TimeProvider>,
@ -360,7 +360,7 @@ impl Db {
worker_iterations_delete_predicate_preservation: AtomicUsize::new(0),
write_buffer_producer: database_to_commit.write_buffer_producer,
cleanup_lock: Default::default(),
lifecycle_policy: tokio::sync::Mutex::new(None),
lifecycle_policy: Mutex::new(None),
time_provider: database_to_commit.time_provider,
delete_predicates_mailbox: Default::default(),
persisted_chunk_id_override: Default::default(),
@ -378,8 +378,8 @@ impl Db {
}
/// Allow persistence if database rules all it.
pub async fn unsuppress_persistence(&self) {
let mut guard = self.lifecycle_policy.lock().await;
pub fn unsuppress_persistence(&self) {
let mut guard = self.lifecycle_policy.lock();
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
@ -819,15 +819,16 @@ impl Db {
loop {
self.worker_iterations_lifecycle
.fetch_add(1, Ordering::Relaxed);
let mut guard = self.lifecycle_policy.lock().await;
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
let fut = {
let mut guard = self.lifecycle_policy.lock();
let policy = guard
.as_mut()
.expect("lifecycle policy should be initialized");
policy
.check_for_work(self.time_provider.now().date_time())
.await
policy.check_for_work(self.time_provider.now().date_time())
};
fut.await
}
};

View File

@ -711,7 +711,7 @@ mod tests {
}
let db = &test_db.db;
db.unsuppress_persistence().await;
db.unsuppress_persistence();
// wait until checks pass
let t_0 = Instant::now();

View File

@ -116,6 +116,58 @@ struct TrackerState {
notify: Notify,
}
impl TrackerState {
fn get_status(&self) -> TaskStatus {
// The atomic decrement in TrackerRegistration::drop has release semantics
// acquire here ensures that if a thread observes the tracker to have
// no pending_registrations it cannot subsequently observe pending_futures
// to increase. If it could, observing pending_futures==0 would be insufficient
// to conclude there are no outstanding futures
let pending_registrations = self.pending_registrations.load(Ordering::Acquire);
// The atomic decrement in TrackedFuture::drop has release semantics
// acquire therefore ensures that if a thread observes the completion of
// a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos)
let pending_futures = self.pending_futures.load(Ordering::Acquire);
match (pending_registrations == 0, pending_futures == 0) {
(false, _) => TaskStatus::Creating,
(true, false) => TaskStatus::Running {
total_count: self.created_futures.load(Ordering::Relaxed),
pending_count: self.pending_futures.load(Ordering::Relaxed),
cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed),
},
(true, true) => {
let total_count = self.created_futures.load(Ordering::Relaxed);
let success_count = self.ok_futures.load(Ordering::Relaxed);
let error_count = self.err_futures.load(Ordering::Relaxed);
let cancelled_count = self.cancelled_futures.load(Ordering::Relaxed);
// Failure of this would imply a future reported its completion status multiple
// times or a future was created without incrementing created_futures.
// Both of these should be impossible
let dropped_count = total_count
.checked_sub(success_count + error_count + cancelled_count)
.expect("invalid tracker state");
TaskStatus::Complete {
total_count,
success_count,
error_count,
cancelled_count,
dropped_count,
cpu_nanos: self.cpu_nanos.load(Ordering::Relaxed),
wall_nanos: self.wall_nanos.load(Ordering::Relaxed),
}
}
}
}
fn is_complete(&self) -> bool {
matches!(self.get_status(), TaskStatus::Complete { .. })
}
}
/// Returns a summary of the task execution
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum TaskResult {
@ -323,54 +375,12 @@ where
/// Returns true if all futures associated with this tracker have
/// been dropped and no more can be created
pub fn is_complete(&self) -> bool {
matches!(self.get_status(), TaskStatus::Complete { .. })
self.state.is_complete()
}
/// Gets the status of the tracker
pub fn get_status(&self) -> TaskStatus {
// The atomic decrement in TrackerRegistration::drop has release semantics
// acquire here ensures that if a thread observes the tracker to have
// no pending_registrations it cannot subsequently observe pending_futures
// to increase. If it could, observing pending_futures==0 would be insufficient
// to conclude there are no outstanding futures
let pending_registrations = self.state.pending_registrations.load(Ordering::Acquire);
// The atomic decrement in TrackedFuture::drop has release semantics
// acquire therefore ensures that if a thread observes the completion of
// a TrackedFuture, it is guaranteed to see its updates (e.g. wall_nanos)
let pending_futures = self.state.pending_futures.load(Ordering::Acquire);
match (pending_registrations == 0, pending_futures == 0) {
(false, _) => TaskStatus::Creating,
(true, false) => TaskStatus::Running {
total_count: self.state.created_futures.load(Ordering::Relaxed),
pending_count: self.state.pending_futures.load(Ordering::Relaxed),
cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed),
},
(true, true) => {
let total_count = self.state.created_futures.load(Ordering::Relaxed);
let success_count = self.state.ok_futures.load(Ordering::Relaxed);
let error_count = self.state.err_futures.load(Ordering::Relaxed);
let cancelled_count = self.state.cancelled_futures.load(Ordering::Relaxed);
// Failure of this would imply a future reported its completion status multiple
// times or a future was created without incrementing created_futures.
// Both of these should be impossible
let dropped_count = total_count
.checked_sub(success_count + error_count + cancelled_count)
.expect("invalid tracker state");
TaskStatus::Complete {
total_count,
success_count,
error_count,
cancelled_count,
dropped_count,
cpu_nanos: self.state.cpu_nanos.load(Ordering::Relaxed),
wall_nanos: self.state.wall_nanos.load(Ordering::Relaxed),
}
}
}
self.state.get_status()
}
/// Returns the instant the tracker was created
@ -385,21 +395,24 @@ where
/// Blocks until all futures associated with the tracker have been
/// dropped and no more can be created
pub async fn join(&self) {
// Notify is notified when pending_futures hits 0 AND when pending_registrations
// hits 0. In almost all cases join won't be called before pending_registrations
// has already hit 0, but in the extremely rare case this occurs the loop
// handles the spurious wakeup
loop {
// Request notification before checking if complete
// to avoid a race condition
let notify = self.state.notify.notified();
pub fn join(&self) -> impl std::future::Future<Output = ()> {
let state = Arc::clone(&self.state);
async move {
// Notify is notified when pending_futures hits 0 AND when pending_registrations
// hits 0. In almost all cases join won't be called before pending_registrations
// has already hit 0, but in the extremely rare case this occurs the loop
// handles the spurious wakeup
loop {
// Request notification before checking if complete
// to avoid a race condition
let notify = state.notify.notified();
if self.is_complete() {
return;
if state.is_complete() {
return;
}
notify.await
}
notify.await
}
}
}