From: Jason Dillaman Date: Fri, 14 Aug 2015 17:30:41 +0000 (-0400) Subject: librbd: possible deadlock attempting to drain parent image WQs X-Git-Tag: v10.0.1~52^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cb634dfa42a192155440a5457f43b574cffdafec;p=ceph.git librbd: possible deadlock attempting to drain parent image WQs Ensure all AIO to the parent image is properly flushed and assert that all work queues are empty before closing the parent image. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/AioImageRequestWQ.h b/src/librbd/AioImageRequestWQ.h index d269fdbd0f8e..20169f590022 100644 --- a/src/librbd/AioImageRequestWQ.h +++ b/src/librbd/AioImageRequestWQ.h @@ -32,6 +32,7 @@ public: void aio_flush(AioCompletion *c); using ThreadPool::PointerWQ::drain; + using ThreadPool::PointerWQ::empty; inline bool writes_empty() const { Mutex::Locker locker(m_lock); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 3a88b6ab78be..d4e1e6ed8169 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2042,8 +2042,7 @@ reprotect_and_return_err: lderr(ictx->cct) << "parent snapshot does not exist" << dendl; ictx->parent->snap_lock.put_write(); ictx->parent->cache_lock.Unlock(); - close_image(ictx->parent); - ictx->parent = NULL; + close_parent(ictx); return r; } ictx->parent->snap_set(ictx->parent->snap_name); @@ -2056,8 +2055,7 @@ reprotect_and_return_err: ictx->parent->parent_lock.put_write(); ictx->parent->snap_lock.put_write(); ictx->parent->cache_lock.Unlock(); - close_image(ictx->parent); - ictx->parent = NULL; + close_parent(ictx); return r; } ictx->parent->parent_lock.put_write(); @@ -2574,8 +2572,7 @@ reprotect_and_return_err: ictx->parent->id != ictx->get_parent_image_id(ictx->snap_id) || ictx->parent->snap_id != ictx->get_parent_snap_id(ictx->snap_id)) { ictx->clear_nonexistence_cache(); - close_image(ictx->parent); - ictx->parent = NULL; + close_parent(ictx); } } @@ -3169,8 +3166,10 @@ reprotect_and_return_err: { ldout(ictx->cct, 20) << "close_image " << ictx << dendl; - // finish all incoming IO operations - ictx->aio_work_queue->drain(); + if (!ictx->read_only) { + // finish all incoming IO operations + ictx->aio_work_queue->drain(); + } int r = 0; { @@ -3222,11 +3221,11 @@ reprotect_and_return_err: } if (ictx->parent) { - int close_r = close_image(ictx->parent); + RWLock::WLocker parent_locker(ictx->parent_lock); + int close_r = close_parent(ictx); if (r == 0 && close_r < 0) { r = close_r; } - ictx->parent = NULL; } if (ictx->image_watcher) { @@ -3237,6 +3236,28 @@ reprotect_and_return_err: return r; } + int close_parent(ImageCtx *ictx) + { + assert(ictx->parent_lock.is_wlocked()); + ImageCtx *parent_ictx = ictx->parent; + + // AIO to the parent must be complete before closing + parent_ictx->flush_async_operations(); + parent_ictx->readahead.wait_for_pending(); + { + Mutex::Locker async_ops_locker(parent_ictx->async_ops_lock); + assert(parent_ictx->async_ops.empty()); + } + + // attempting to drain the work queues might result in deadlock + assert(parent_ictx->aio_work_queue->empty()); + assert(parent_ictx->op_work_queue->empty()); + + int r = close_image(parent_ictx); + ictx->parent = NULL; + return r; + } + // 'flatten' child image by copying all parent's blocks int flatten(ImageCtx *ictx, ProgressContext &prog_ctx) { diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 634d2b337550..e94cc2b0accb 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -153,6 +153,7 @@ namespace librbd { int open_parent(ImageCtx *ictx); int open_image(ImageCtx *ictx); int close_image(ImageCtx *ictx); + int close_parent(ImageCtx *ictx); int flatten(ImageCtx *ictx, ProgressContext &prog_ctx);