From: Jason Dillaman Date: Wed, 21 Jan 2015 13:58:57 +0000 (-0500) Subject: librbd: fix copy-on-read / resize down race condition X-Git-Tag: v0.93~197^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bda293adc254d221f9d727f1b5ddfcfca8c6249d;p=ceph.git librbd: fix copy-on-read / resize down race condition There was a rare race condition between a pending CoR operation and a resize down operation resulting in a CoR copyup past the new, reduced parent overlap. This commit also adds additional log message details for CoR. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 8cb627ac879..54039630276 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -172,9 +172,10 @@ namespace librbd { if (newlen != 0) { // create and kick off a CopyupRequest CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, true); + m_object_no, + m_image_extents); m_ictx->copyup_list[m_object_no] = new_req; - new_req->queue_read_from_parent(m_image_extents); + new_req->queue_read_from_parent(); } } } @@ -294,7 +295,8 @@ namespace librbd { if (it == m_ictx->copyup_list.end()) { // If it is not in the list, create a CopyupRequest and wait for it. CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, - m_object_no, false); + m_object_no, + m_object_image_extents); // make sure to wait on this CopyupRequest new_req->append_request(this); m_ictx->copyup_list[m_object_no] = new_req; @@ -303,7 +305,7 @@ namespace librbd { ldout(m_ictx->cct, 20) << __func__ << " creating new Copyup for AioWrite, obj-" << m_object_no << dendl; m_ictx->copyup_list_lock.Unlock(); - new_req->read_from_parent(m_object_image_extents); + new_req->read_from_parent(); ldout(m_ictx->cct, 20) << __func__ << " issuing read_from_parent" << dendl; } else { ldout(m_ictx->cct, 20) << __func__ << " someone is reading back from parent" << dendl; @@ -338,18 +340,14 @@ namespace librbd { // Read data from waiting list safely. If this AioWrite created a // CopyupRequest, m_read_data should be empty. - { - RWLock::RLocker l3(m_ictx->snap_lock); - if (is_copy_on_read(m_ictx, m_snap_id) && - m_read_data.is_zero()) { - Mutex::Locker l(m_ictx->copyup_list_lock); - ldout(m_ictx->cct, 20) << __func__ << " releasing self pending, obj-" - << m_object_no << dendl; - it = m_ictx->copyup_list.find(m_object_no); - assert(it != m_ictx->copyup_list.end()); - assert(m_entire_object); - m_read_data.append(*m_entire_object); - } + if (m_entire_object != NULL) { + assert(m_ictx->copyup_list_lock.is_locked()); + ldout(m_ictx->cct, 20) << __func__ << " releasing self pending, obj-" + << m_object_no << dendl; + assert(m_ictx->copyup_list.find(m_object_no) != + m_ictx->copyup_list.end()); + assert(m_read_data.length() == 0); + m_read_data.append(*m_entire_object); } send_copyup(); diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 690d149d4fd..ab83b3ae836 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -18,8 +18,10 @@ namespace librbd { CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, - uint64_t objectno, bool send_copyup) - : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_send_copyup(send_copyup) + uint64_t objectno, + vector >& image_extents) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), + m_image_extents(image_extents) { } @@ -47,6 +49,7 @@ namespace librbd { } void CopyupRequest::append_request(AioRequest *req) { + ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl; m_pending_requests.push_back(req); } @@ -54,13 +57,17 @@ namespace librbd { while (!m_pending_requests.empty()) { vector::iterator it = m_pending_requests.begin(); AioRequest *req = *it; + ldout(m_ictx->cct, 20) << __func__ << " completing request " << req + << dendl; req->complete(r); m_pending_requests.erase(it); } } void CopyupRequest::send_copyup(int r) { - ldout(m_ictx->cct, 20) << __func__ << dendl; + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid + << ", r " << r << dendl; m_ictx->snap_lock.get_read(); ::SnapContext snapc = m_ictx->snapc; @@ -78,16 +85,17 @@ namespace librbd { comp->release(); } - void CopyupRequest::read_from_parent(vector >& image_extents) + void CopyupRequest::read_from_parent() { AioCompletion *comp = aio_create_completion_internal( this, &CopyupRequest::read_from_parent_cb); - ldout(m_ictx->cct, 20) << __func__ << " this = " << this - << " parent completion " << comp - << " extents " << image_extents + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": completion " << comp + << ", oid " << m_oid + << ", extents " << m_image_extents << dendl; - int r = aio_read(m_ictx->parent, image_extents, NULL, &m_copyup_data, + int r = aio_read(m_ictx->parent, m_image_extents, NULL, &m_copyup_data, comp, 0); if (r < 0) { comp->release(); @@ -95,11 +103,14 @@ namespace librbd { } } - void CopyupRequest::queue_read_from_parent(vector >& image_extents) + void CopyupRequest::queue_read_from_parent() { // TODO: once the ObjectCacher allows reentrant read requests, the finisher // should be eliminated - C_ReadFromParent *ctx = new C_ReadFromParent(this, image_extents); + ldout(m_ictx->cct, 20) << __func__ << " " << this + << ": oid " << m_oid << " " + << ", extents " << m_image_extents << dendl; + C_ReadFromParent *ctx = new C_ReadFromParent(this); m_ictx->copyup_finisher->queue(ctx); } @@ -108,19 +119,21 @@ namespace librbd { CopyupRequest *req = reinterpret_cast(arg); AioCompletion *comp = reinterpret_cast(cb); - ldout(req->m_ictx->cct, 20) << __func__ << dendl; - req->complete_all(comp->get_return_value()); + ldout(req->m_ictx->cct, 20) << __func__ << " " << req + << ": oid " << req->m_oid + << ", extents " << req->m_image_extents << dendl; // If this entry is created by a read request, then copyup operation will // be performed asynchronously. Perform cleaning up from copyup callback. // If this entry is created by a write request, then copyup operation will // be performed synchronously by AioWrite. After extracting data, perform // cleaning up here - if (req->m_send_copyup) { + Mutex::Locker l(req->m_ictx->copyup_list_lock); + if (req->m_pending_requests.empty()) { req->send_copyup(comp->get_return_value()); + } else { + req->complete_all(comp->get_return_value()); } - - Mutex::Locker l(req->m_ictx->copyup_list_lock); delete req; } } diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h index d251fa8ee60..107189f1f69 100644 --- a/src/librbd/CopyupRequest.h +++ b/src/librbd/CopyupRequest.h @@ -16,33 +16,31 @@ namespace librbd { class CopyupRequest { public: CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, - bool send_copyup); + vector >& image_extents); ~CopyupRequest(); ceph::bufferlist& get_copyup_data(); void append_request(AioRequest *req); - void read_from_parent(vector >& image_extents); - void queue_read_from_parent(vector >& image_extents); + void read_from_parent(); + void queue_read_from_parent(); private: class C_ReadFromParent : public Context { public: - C_ReadFromParent(CopyupRequest *c, vector > i) - : m_req(c), m_image_extents(i) {} + C_ReadFromParent(CopyupRequest *c) : m_req(c) {} virtual void finish(int r) { - m_req->read_from_parent(m_image_extents); + m_req->read_from_parent(); } private: CopyupRequest *m_req; - vector > m_image_extents; }; ImageCtx *m_ictx; std::string m_oid; uint64_t m_object_no; - bool m_send_copyup; + vector > m_image_extents; ceph::bufferlist m_copyup_data; vector m_pending_requests; diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 3d977f3eb97..7550f15d63d 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -1623,12 +1623,15 @@ reprotect_and_return_err: } RWLock::WLocker l2(ictx->md_lock); - if (size < ictx->size && ictx->object_cacher) { - // need to invalidate since we're deleting objects, and - // ObjectCacher doesn't track non-existent objects - r = ictx->invalidate_cache(); - if (r < 0) { - return r; + if (size < ictx->size) { + ictx->wait_for_pending_copyup(); + if (ictx->object_cacher) { + // need to invalidate since we're deleting objects, and + // ObjectCacher doesn't track non-existent objects + r = ictx->invalidate_cache(); + if (r < 0) { + return r; + } } } resize_helper(ictx, size, prog_ctx); @@ -2205,10 +2208,10 @@ reprotect_and_return_err: ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = " << (snap_name ? snap_name : "NULL") << dendl; - ictx->wait_for_pending_copyup(); // ignore return value, since we may be set to a non-existent // snapshot and the user is trying to fix that ictx_check(ictx); + ictx->wait_for_pending_copyup(); if (ictx->image_watcher != NULL) { ictx->image_watcher->flush_aio_operations(); }