From: Jason Dillaman Date: Mon, 2 Feb 2015 20:08:42 +0000 (-0500) Subject: librbd: fixed object map issues discovered via fsx X-Git-Tag: v0.93~117 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9e9356b1d8a49ec03d2bc8df5ad08d894c10eabe;p=ceph.git librbd: fixed object map issues discovered via fsx The object map wasn't being properly refreshed after setting the snapshot context on the parent image. Additionally fixed a potential deadlock that could have occurred if no object map update was required when trimming an image. Fixes: #10706 Signed-off-by: Jason Dillaman Reviewed-by: Josh Durgin --- diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index c4948f81ee46..35ba2e6bd896 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -438,8 +438,11 @@ namespace librbd { m_state = LIBRBD_AIO_WRITE_PRE; FunctionContext *ctx = new FunctionContext( boost::bind(&AioRequest::complete, this, _1)); - m_ictx->object_map->aio_update(m_object_no, new_state, - current_state, ctx); + if (!m_ictx->object_map->aio_update(m_object_no, new_state, + current_state, ctx)) { + // no object map update required + return false; + } } } @@ -469,8 +472,11 @@ namespace librbd { m_state = LIBRBD_AIO_WRITE_POST; FunctionContext *ctx = new FunctionContext( boost::bind(&AioRequest::complete, this, _1)); - m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT, - OBJECT_PENDING, ctx); + if (!m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT, + OBJECT_PENDING, ctx)) { + // no object map update required + return true; + } return false; } diff --git a/src/librbd/AsyncResizeRequest.cc b/src/librbd/AsyncResizeRequest.cc index d0e330775abd..af2a3cdf8dcc 100644 --- a/src/librbd/AsyncResizeRequest.cc +++ b/src/librbd/AsyncResizeRequest.cc @@ -33,7 +33,7 @@ bool AsyncResizeRequest::should_complete(int r) switch (m_state) { case STATE_TRIM_IMAGE: ldout(cct, 5) << "TRIM_IMAGE" << dendl; - send_grow_object_map(); + send_update_header(); break; case STATE_GROW_OBJECT_MAP: @@ -127,10 +127,10 @@ void AsyncResizeRequest::send_grow_object_map() { } } + // avoid possible recursive lock attempts if (!object_map_enabled) { send_update_header(); } else if (lost_exclusive_lock) { - // only complete when not holding locks complete(-ERESTART); } } @@ -160,8 +160,8 @@ bool AsyncResizeRequest::send_shrink_object_map() { } } + // avoid possible recursive lock attempts if (lost_exclusive_lock) { - // only complete when not holding locks complete(-ERESTART); } return false; @@ -207,8 +207,8 @@ void AsyncResizeRequest::send_update_header() { } } + // avoid possible recursive lock attempts if (lost_exclusive_lock) { - // only complete when not holding locks complete(-ERESTART); } } diff --git a/src/librbd/AsyncTrimRequest.cc b/src/librbd/AsyncTrimRequest.cc index 610d2d011a1e..3bbab5525258 100644 --- a/src/librbd/AsyncTrimRequest.cc +++ b/src/librbd/AsyncTrimRequest.cc @@ -159,61 +159,75 @@ void AsyncTrimRequest::send_remove_objects() { } void AsyncTrimRequest::send_pre_remove() { + bool remove_objects = false; bool lost_exclusive_lock = false; { RWLock::RLocker l(m_image_ctx.owner_lock); RWLock::RLocker l2(m_image_ctx.md_lock); if (m_image_ctx.object_map == NULL) { - send_remove_objects(); - return; - } - - ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: " - << " delete_start=" << m_delete_start - << " num_objects=" << m_num_objects << dendl; - m_state = STATE_PRE_REMOVE; - - if (!m_image_ctx.image_watcher->is_lock_owner()) { - ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl; - lost_exclusive_lock = true; + remove_objects = true; } else { - // flag the objects as pending deletion - m_image_ctx.object_map->aio_update( - m_delete_start, m_num_objects, OBJECT_PENDING, OBJECT_EXISTS, - create_callback_context()); + ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_PRE_REMOVE; + + if (!m_image_ctx.image_watcher->is_lock_owner()) { + ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl; + lost_exclusive_lock = true; + } else { + // flag the objects as pending deletion + if (!m_image_ctx.object_map->aio_update(m_delete_start, m_num_objects, + OBJECT_PENDING, OBJECT_EXISTS, + create_callback_context())) { + remove_objects = true; + } + } } } - if (lost_exclusive_lock) { + // avoid possible recursive lock attempts + if (remove_objects) { + // no object map update required + send_remove_objects(); + } else if (lost_exclusive_lock) { complete(-ERESTART); } } bool AsyncTrimRequest::send_post_remove() { + bool clean_boundary = false; bool lost_exclusive_lock = false; { RWLock::RLocker l(m_image_ctx.owner_lock); RWLock::RLocker l2(m_image_ctx.md_lock); if (m_image_ctx.object_map == NULL) { - return send_clean_boundary(); - } - - ldout(m_image_ctx.cct, 5) << this << " send_post_remove: " - << " delete_start=" << m_delete_start - << " num_objects=" << m_num_objects << dendl; - m_state = STATE_POST_REMOVE; - - if (!m_image_ctx.image_watcher->is_lock_owner()) { - ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl; + clean_boundary = true; } else { - // flag the pending objects as removed - m_image_ctx.object_map->aio_update( - m_delete_start, m_num_objects, OBJECT_NONEXISTENT, OBJECT_PENDING, - create_callback_context()); + ldout(m_image_ctx.cct, 5) << this << " send_post_remove: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_POST_REMOVE; + + if (!m_image_ctx.image_watcher->is_lock_owner()) { + ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl; + } else { + // flag the pending objects as removed + if (!m_image_ctx.object_map->aio_update(m_delete_start, m_num_objects, + OBJECT_NONEXISTENT, + OBJECT_PENDING, + create_callback_context())) { + clean_boundary = true; + } + } } } - if (lost_exclusive_lock) { + // avoid possible recursive lock attempts + if (clean_boundary) { + // no object map update required + return send_clean_boundary(); + } else if (lost_exclusive_lock) { complete(-ERESTART); } return false; @@ -287,6 +301,7 @@ bool AsyncTrimRequest::send_clean_boundary() { } + // avoid possible recursive lock attempts if (lost_exclusive_lock) { complete(-ERESTART); } else if (completion != NULL) { diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc index 0a6df6a60c3d..fa4fe934ba8b 100644 --- a/src/librbd/CopyupRequest.cc +++ b/src/librbd/CopyupRequest.cc @@ -171,28 +171,32 @@ namespace librbd { } bool CopyupRequest::send_object_map() { - bool object_map_enabled = true; + bool copyup = false; { RWLock::RLocker l(m_ictx->owner_lock); RWLock::RLocker l2(m_ictx->md_lock); if (m_ictx->object_map == NULL) { - object_map_enabled = false; + copyup = true; } else if (!m_ictx->image_watcher->is_lock_owner()) { ldout(m_ictx->cct, 20) << "exclusive lock not held for copy-on-read" << dendl; return true; } else { m_state = STATE_OBJECT_MAP; - m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS, - boost::optional(), - create_callback_context()); + if (!m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS, + boost::optional(), + create_callback_context())) { + copyup = true; + } } } - if (!object_map_enabled) { + // avoid possible recursive lock attempts + if (copyup) { + // no object map update required send_copyup(); return true; - } + } return false; } diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index f8649e28e611..398b49b69dbb 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -298,6 +298,10 @@ namespace librbd { snap_name = in_snap_name; snap_exists = true; data_ctx.snap_set_read(snap_id); + + if (object_map != NULL) { + object_map->refresh(); + } return 0; } return -ENOENT; @@ -309,6 +313,10 @@ namespace librbd { snap_name = ""; snap_exists = true; data_ctx.snap_set_read(snap_id); + + if (object_map != NULL) { + object_map->refresh(); + } } snap_t ImageCtx::get_snap_id(string in_snap_name) const diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc index e4c736e2b939..f179fa833029 100644 --- a/src/librbd/ObjectMap.cc +++ b/src/librbd/ObjectMap.cc @@ -6,6 +6,7 @@ #include "librbd/internal.h" #include "common/dout.h" #include "common/errno.h" +#include "include/stringify.h" #include "cls/lock/cls_lock_client.h" #define dout_subsys ceph_subsys_rbd @@ -29,7 +30,7 @@ int ObjectMap::lock() bool broke_lock = false; CephContext *cct = m_image_ctx.cct; while (true) { - ldout(cct, 10) << "locking object map" << dendl; + ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl; r = rados::cls::lock::lock(&m_image_ctx.md_ctx, object_map_name(m_image_ctx.id), RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "", @@ -86,6 +87,8 @@ int ObjectMap::unlock() return 0; } + ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map" + << dendl; int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, object_map_name(m_image_ctx.id), RBD_LOCK_NAME, ""); @@ -106,8 +109,13 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const RWLock::RLocker l(m_image_ctx.object_map_lock); assert(object_no < object_map.size()); - return (object_map[object_no] == OBJECT_EXISTS || - object_map[object_no] == OBJECT_PENDING); + + bool exists = (object_map[object_no] == OBJECT_EXISTS || + object_map[object_no] == OBJECT_PENDING); + ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: " + << "object_no=" << object_no << " r=" << exists + << dendl; + return exists; } int ObjectMap::refresh() @@ -115,9 +123,11 @@ int ObjectMap::refresh() if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { return 0; } - - RWLock::WLocker l(m_image_ctx.object_map_lock); + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl; + + RWLock::WLocker l(m_image_ctx.object_map_lock); int r = cls_client::object_map_load(&m_image_ctx.data_ctx, object_map_name(m_image_ctx.id), &object_map); @@ -156,14 +166,15 @@ void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state, req->send(); } -void ObjectMap::aio_update(uint64_t object_no, uint8_t new_state, +bool ObjectMap::aio_update(uint64_t object_no, uint8_t new_state, const boost::optional ¤t_state, Context *on_finish) { - aio_update(object_no, object_no + 1, new_state, current_state, on_finish); + return aio_update(object_no, object_no + 1, new_state, current_state, + on_finish); } -void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, +bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, uint8_t new_state, const boost::optional ¤t_state, Context *on_finish) @@ -172,42 +183,32 @@ void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, assert(m_image_ctx.owner_lock.is_locked()); assert(m_image_ctx.image_watcher->is_lock_owner()); - bool update_required = false; - { - RWLock::WLocker l(m_image_ctx.object_map_lock); - assert(start_object_no < end_object_no); + RWLock::WLocker l(m_image_ctx.object_map_lock); + assert(start_object_no < end_object_no); - CephContext *cct = m_image_ctx.cct; - if (end_object_no > object_map.size()) { - ldout(cct, 20) << "skipping update of invalid object map" << dendl; - return; - } + CephContext *cct = m_image_ctx.cct; + if (end_object_no > object_map.size()) { + ldout(cct, 20) << "skipping update of invalid object map" << dendl; + return false; + } - for (uint64_t object_no = start_object_no; object_no < end_object_no; - ++object_no) { - if ((!current_state || object_map[object_no] == *current_state) && - object_map[object_no] != new_state) { - update_required = true; - break; - } - } - - if (update_required) { + for (uint64_t object_no = start_object_no; object_no < end_object_no; + ++object_no) { + if ((!current_state || object_map[object_no] == *current_state) && + object_map[object_no] != new_state) { UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no, end_object_no, new_state, current_state, on_finish); req->send(); + return true; } } - - if (!update_required) { - on_finish->complete(0); - } + return false; } void ObjectMap::invalidate() { CephContext *cct = m_image_ctx.cct; - lderr(cct) << this << " invalidating object map" << dendl; + lderr(cct) << &m_image_ctx << " invalidating object map" << dendl; m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID; librados::ObjectWriteOperation op; @@ -222,7 +223,7 @@ void ObjectMap::invalidate() { bool ObjectMap::Request::should_complete(int r) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << this << " should_complete: r=" << r << dendl; + ldout(cct, 20) << &m_image_ctx << " should_complete: r=" << r << dendl; switch (m_state) { @@ -252,9 +253,8 @@ bool ObjectMap::Request::should_complete(int r) { if (r < 0) { lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r) << dendl; - return true; } - break; + return true; default: lderr(cct) << "invalid state: " << m_state << dendl; @@ -268,7 +268,7 @@ void ObjectMap::Request::invalidate() { CephContext *cct = m_image_ctx.cct; RWLock::WLocker l(m_image_ctx.md_lock); - lderr(cct) << this << " invalidating object map" << dendl; + lderr(cct) << &m_image_ctx << " invalidating object map" << dendl; m_state = STATE_INVALIDATE; m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID; @@ -288,7 +288,8 @@ void ObjectMap::ResizeRequest::send() { RWLock::WLocker l(m_image_ctx.object_map_lock); m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size); - ldout(cct, 5) << this << " resizing on-disk object map: " << m_num_objs << dendl; + ldout(cct, 5) << &m_image_ctx << " resizing on-disk object map: " + << m_num_objs << dendl; librados::ObjectWriteOperation op; rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", ""); @@ -304,7 +305,8 @@ void ObjectMap::ResizeRequest::send() { void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " resizing in-memory object map: " << m_num_objs << dendl; + ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: " + << m_num_objs << dendl; size_t orig_object_map_size = object_map->object_map.size(); object_map->object_map.resize(m_num_objs); for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) { @@ -315,9 +317,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) { void ObjectMap::UpdateRequest::send() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << this << " updating on-disk object map: [" + ldout(cct, 20) << &m_image_ctx << " updating on-disk object map: [" << m_start_object_no << "," << m_end_object_no << ") = " - << static_cast(m_new_state) << dendl; + << (m_current_state ? stringify(*m_current_state) : "") + << "->" << static_cast(m_new_state) + << dendl; librados::ObjectWriteOperation op; rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", ""); @@ -334,7 +338,7 @@ void ObjectMap::UpdateRequest::send() { void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << this << " updating in-memory object map" << dendl; + ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl; for (uint64_t object_no = m_start_object_no; object_no < MIN(m_end_object_no, object_map->object_map.size()); ++object_no) { diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h index 648043d38f96..29eac9730af9 100644 --- a/src/librbd/ObjectMap.h +++ b/src/librbd/ObjectMap.h @@ -31,10 +31,10 @@ public: void aio_resize(uint64_t new_size, uint8_t default_object_state, Context *on_finish); - void aio_update(uint64_t object_no, uint8_t new_state, - const boost::optional ¤t_state, - Context *on_finish); - void aio_update(uint64_t start_object_no, uint64_t end_object_no, + bool aio_update(uint64_t object_no, uint8_t new_state, + const boost::optional ¤t_state, + Context *on_finish); + bool aio_update(uint64_t start_object_no, uint64_t end_object_no, uint8_t new_state, const boost::optional ¤t_state, Context *on_finish); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index f75c14cbb94c..285f0551eebe 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2281,10 +2281,6 @@ reprotect_and_return_err: if (r < 0) { return r; } - - if (ictx->object_map != NULL) { - ictx->object_map->refresh(); - } refresh_parent(ictx); return 0; } diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py index 4d3cf9d4cfbb..985bcf0c408b 100644 --- a/src/test/pybind/test_rbd.py +++ b/src/test/pybind/test_rbd.py @@ -741,6 +741,7 @@ class TestClone(object): def test_resize_io(self): parent_data = self.image.read(IMG_SIZE / 2, 256) + self.image.resize(0) self.clone.resize(IMG_SIZE / 2 + 128) child_data = self.clone.read(IMG_SIZE / 2, 128) eq(child_data, parent_data[:128])