From d28c18da9dd905316a30267305f0a5d656e65ec4 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 19 Jul 2013 15:56:52 -0700 Subject: [PATCH] OSD::RemoveWQ: do not apply_transaction while blocking _try_resurrect_pg Some callbacks take the osd lock, so we need to avoid blocking an osd lock holding thread while waiting on a filestore callback. Instead, just queue the transaction, and allow _try_resurrect_pg to cancel us while we are waiting for the transaction to go through (CLEARING_WAITING). Fixes: #5672 Signed-off-by: Samuel Just Reviewed-by: Sage Weil --- src/osd/OSD.cc | 21 +++++++++++++++------ src/osd/OSD.h | 40 +++++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 1ee4c09a63e1f..8cc9e31459d61 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3267,21 +3267,30 @@ bool remove_dir( } t->remove(coll, *i); if (num >= g_conf->osd_target_transaction_size) { - store->apply_transaction(osr, *t); + C_SaferCond waiter; + store->queue_transaction(osr, t, &waiter); + bool cont = dstate->pause_clearing(); + waiter.wait(); + if (cont) + cont = dstate->resume_clearing(); delete t; - if (!dstate->check_canceled()) { - // canceled! + if (!cont) return false; - } t = new ObjectStore::Transaction; num = 0; } } olist.clear(); } - store->apply_transaction(osr, *t); + + C_SaferCond waiter; + store->queue_transaction(osr, t, &waiter); + bool cont = dstate->pause_clearing(); + waiter.wait(); + if (cont) + cont = dstate->resume_clearing(); delete t; - return true; + return cont; } void OSD::RemoveWQ::_process(pair item) diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 238c5b4359486..04ad4dcd7d7e8 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -148,6 +148,7 @@ class DeletingState { enum { QUEUED, CLEARING_DIR, + CLEARING_WAITING, DELETING_DIR, DELETED_DIR, CANCELED, @@ -160,8 +161,23 @@ public: lock("DeletingState::lock"), status(QUEUED), stop_deleting(false), pgid(in.first), old_pg_state(in.second) {} - /// check whether removal was canceled - bool check_canceled() { + /// transition status to clearing + bool start_clearing() { + Mutex::Locker l(lock); + assert( + status == QUEUED || + status == DELETED_DIR); + if (stop_deleting) { + status = CANCELED; + cond.Signal(); + return false; + } + status = CLEARING_DIR; + return true; + } ///< @return false if we should cancel deletion + + /// transition status to CLEARING_WAITING + bool pause_clearing() { Mutex::Locker l(lock); assert(status == CLEARING_DIR); if (stop_deleting) { @@ -169,15 +185,14 @@ public: cond.Signal(); return false; } + status = CLEARING_WAITING; return true; - } ///< @return false if canceled, true if we should continue + } ///< @return false if we should cancel deletion - /// transition status to clearing - bool start_clearing() { + /// transition status to CLEARING_DIR + bool resume_clearing() { Mutex::Locker l(lock); - assert( - status == QUEUED || - status == DELETED_DIR); + assert(status == CLEARING_WAITING); if (stop_deleting) { status = CANCELED; cond.Signal(); @@ -215,11 +230,10 @@ public: /** * If we are in DELETING_DIR or CLEARING_DIR, there are in progress * operations we have to wait for before continuing on. States - * DELETED_DIR, QUEUED, and CANCELED either check for stop_deleting - * prior to performing any operations or signify the end of the - * deleting process. We don't want to wait to leave the QUEUED - * state, because this might block the caller behind an entire pg - * removal. + * CLEARING_WAITING and QUEUED indicate that the remover will check + * stop_deleting before queueing any further operations. CANCELED + * indicates that the remover has already halted. DELETED_DIR + * indicates that the deletion has been fully queueud. */ while (status == DELETING_DIR || status == CLEARING_DIR) cond.Wait(lock); -- 2.39.5