From: Jason Dillaman Date: Thu, 19 Mar 2015 19:33:07 +0000 (-0400) Subject: librbd: added RebuildObjectMapRequest state machine X-Git-Tag: v9.0.1~149^2~16 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c0cd3829e026a9d90953db5c892f72772381d533;p=ceph.git librbd: added RebuildObjectMapRequest state machine It will verify the object map is properly sized, verify the existence of each object within the image (snapshot), and clear the invalid object map flag once complete. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am index 96d0a00ce30c..2a8133f18999 100644 --- a/src/librbd/Makefile.am +++ b/src/librbd/Makefile.am @@ -20,7 +20,8 @@ librbd_internal_la_SOURCES = \ librbd/ImageWatcher.cc \ librbd/internal.cc \ librbd/LibrbdWriteback.cc \ - librbd/ObjectMap.cc + librbd/ObjectMap.cc \ + librbd/RebuildObjectMapRequest.cc noinst_LTLIBRARIES += librbd_internal.la librbd_api_la_SOURCES = \ @@ -64,6 +65,7 @@ noinst_HEADERS += \ librbd/LibrbdWriteback.h \ librbd/ObjectMap.h \ librbd/parent_types.h \ + librbd/RebuildObjectMapRequest.h \ librbd/SnapInfo.h \ librbd/TaskFinisher.h \ librbd/WatchNotifyTypes.h diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc index 243a61ea264d..f32ad7cf48be 100644 --- a/src/librbd/ObjectMap.cc +++ b/src/librbd/ObjectMap.cc @@ -33,6 +33,13 @@ std::string ObjectMap::object_map_name(const std::string &image_id, return oid; } +uint8_t ObjectMap::operator[](uint64_t object_no) const +{ + assert(m_image_ctx.object_map_lock.is_locked()); + assert(object_no < m_object_map.size()); + return m_object_map[object_no]; +} + bool ObjectMap::enabled() const { RWLock::RLocker l(m_image_ctx.object_map_lock); @@ -135,10 +142,8 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const if (!m_enabled) { return true; } - assert(object_no < m_object_map.size()); - - bool exists = (m_object_map[object_no] == OBJECT_EXISTS || - m_object_map[object_no] == OBJECT_PENDING); + uint8_t state = (*this)[object_no]; + bool exists = (state == OBJECT_EXISTS || state == OBJECT_PENDING); ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: " << "object_no=" << object_no << " r=" << exists << dendl; @@ -321,7 +326,7 @@ bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, RWLock::WLocker l(m_image_ctx.object_map_lock); assert(start_object_no < end_object_no); - + CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << &m_image_ctx << " aio_update: start=" << start_object_no << ", end=" << end_object_no << ", new_state=" diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h index 95bb85227585..653bba553086 100644 --- a/src/librbd/ObjectMap.h +++ b/src/librbd/ObjectMap.h @@ -27,6 +27,11 @@ public: static std::string object_map_name(const std::string &image_id, uint64_t snap_id); + uint8_t operator[](uint64_t object_no) const; + inline uint64_t size() const { + return m_object_map.size(); + } + int lock(); int unlock(); diff --git a/src/librbd/RebuildObjectMapRequest.cc b/src/librbd/RebuildObjectMapRequest.cc new file mode 100644 index 000000000000..2ff8aff9f68f --- /dev/null +++ b/src/librbd/RebuildObjectMapRequest.cc @@ -0,0 +1,367 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/RebuildObjectMapRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/AsyncResizeRequest.h" +#include "librbd/AsyncTrimRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: " + +namespace librbd { + +namespace { + +class C_VerifyObject : public C_AsyncObjectThrottle { +public: + C_VerifyObject(AsyncObjectThrottle &throttle, ImageCtx *image_ctx, + uint64_t snap_id, uint64_t object_no) + : C_AsyncObjectThrottle(throttle), m_image_ctx(*image_ctx), + m_snap_id(snap_id), m_object_no(object_no), + m_oid(m_image_ctx.get_object_name(m_object_no)) + { + } + + virtual void complete(int r) { + if (should_complete(r)) { + ldout(m_image_ctx.cct, 5) << " C_VerifyObject completed " << dendl; + finish(r); + delete this; + } + } + + virtual int send() { + send_assert_exists(); + return 0; + } + +private: + /** + * Verifying the object map for a single object follows the following state + * machine: + * + * + * | + * v + * STATE_ASSERT_EXISTS --------> STATE_UPDATE_OBJECT_MAP + * . | + * . v + * . . . . . . . . . . . . . . . . > + * + * The _UPDATE_OBJECT_MAP state is skipped if the object map does not + * need to be updated. + */ + enum State { + STATE_ASSERT_EXISTS, + STATE_UPDATE_OBJECT_MAP + }; + + + ImageCtx &m_image_ctx; + uint64_t m_snap_id; + uint64_t m_object_no; + std::string m_oid; + + State m_state; + + bool should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << m_oid << " C_VerifyObject::should_complete: " << " r=" << r + << dendl; + + bool finished = false; + switch (m_state) { + case STATE_ASSERT_EXISTS: + ldout(cct, 5) << "ASSERT_EXISTS" << dendl; + if (r == 0 || r == -ENOENT) { + return send_update_object_map(r == 0); + } + break; + + case STATE_UPDATE_OBJECT_MAP: + ldout(cct, 5) << "UPDATE_OBJECT_MAP" << dendl; + finished = true; + break; + + default: + assert(false); + break; + } + + if (r < 0) { + lderr(cct) << "encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + return finished; + } + + void send_assert_exists() { + ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::assert_exists" + << dendl; + + m_state = STATE_ASSERT_EXISTS; + librados::AioCompletion *comp = librados::Rados::aio_create_completion( + this, NULL, rados_ctx_cb); + + librados::ObjectReadOperation op; + op.assert_exists(); + int r = m_image_ctx.data_ctx.aio_operate(m_oid, comp, &op, NULL); + assert(r == 0); + } + + bool send_update_object_map(bool exists) { + CephContext *cct = m_image_ctx.cct; + bool lost_exclusive_lock = false; + + { + RWLock::RLocker l(m_image_ctx.owner_lock); + if (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()) { + ldout(cct, 1) << m_oid << " lost exclusive lock during verify" << dendl; + lost_exclusive_lock = true; + } else { + RWLock::WLocker l(m_image_ctx.object_map_lock); + uint8_t state = m_image_ctx.object_map[m_object_no]; + uint8_t new_state = state; + if (exists && state == OBJECT_NONEXISTENT) { + new_state = OBJECT_EXISTS; + } else if (!exists && state != OBJECT_NONEXISTENT) { + new_state = OBJECT_NONEXISTENT; + } + + if (new_state != state) { + ldout(cct, 5) << m_oid << " C_VerifyObject::send_update_object_map" + << dendl; + bool updating = m_image_ctx.object_map.aio_update(m_object_no, + new_state, state, + this); + assert(updating); + return false; + } + } + } + + if (lost_exclusive_lock) { + complete(-ERESTART); + return false; + } + return true; + } +}; + +} // anonymous namespace + + +void RebuildObjectMapRequest::send() { + send_resize_object_map(); +} + +bool RebuildObjectMapRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + + switch (m_state) { + case STATE_RESIZE_OBJECT_MAP: + ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl; + if (r == -ESTALE && !m_attempted_trim) { + // objects are still flagged as in-use -- delete them + m_attempted_trim = true; + send_trim_image(); + return false; + } else if (r == 0) { + send_verify_objects(); + } + break; + + case STATE_TRIM_IMAGE: + ldout(cct, 5) << "TRIM_IMAGE" << dendl; + if (r == 0) { + send_resize_object_map(); + } + break; + + case STATE_VERIFY_OBJECTS: + ldout(cct, 5) << "VERIFY_OBJECTS" << dendl; + if (r == 0) { + return send_update_header(); + } + break; + + case STATE_UPDATE_HEADER: + ldout(cct, 5) << "UPDATE_HEADER" << dendl; + if (r == 0) { + return true; + } + break; + + default: + assert(false); + break; + } + + if (r < 0) { + lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r) + << dendl; + return true; + } + return false; +} + +void RebuildObjectMapRequest::send_resize_object_map() { + CephContext *cct = m_image_ctx.cct; + bool lost_exclusive_lock = false; + bool skip_resize = true; + + m_state = STATE_RESIZE_OBJECT_MAP; + { + RWLock::RLocker l(m_image_ctx.owner_lock); + if (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during resize" << dendl; + lost_exclusive_lock = true; + } else { + RWLock::RLocker l(m_image_ctx.snap_lock); + uint64_t size = get_image_size(); + uint64_t num_objects = Striper::get_num_objects(m_image_ctx.layout, size); + if (m_image_ctx.object_map.size() != num_objects) { + ldout(cct, 5) << this << " send_resize_object_map" << dendl; + + m_image_ctx.object_map.aio_resize(num_objects, OBJECT_NONEXISTENT, + create_callback_context()); + skip_resize = false; + } + } + } + + if (lost_exclusive_lock) { + complete(-ERESTART); + } else if (skip_resize) { + send_verify_objects(); + } +} + +void RebuildObjectMapRequest::send_trim_image() { + CephContext *cct = m_image_ctx.cct; + bool lost_exclusive_lock = false; + bool skip_trim = true; + + m_state = STATE_TRIM_IMAGE; + { + RWLock::RLocker l(m_image_ctx.owner_lock); + if (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during trim" << dendl; + lost_exclusive_lock = true; + } else { + ldout(cct, 5) << this << " send_trim_image" << dendl; + + uint64_t new_size; + uint64_t orig_size; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + new_size = get_image_size(); + orig_size = m_image_ctx.get_object_size() * + m_image_ctx.object_map.size(); + } + AsyncTrimRequest *req = new AsyncTrimRequest(m_image_ctx, + create_callback_context(), + orig_size, new_size, + m_prog_ctx); + req->send(); + skip_trim = false; + } + } + + if (lost_exclusive_lock) { + complete(-ERESTART); + } else if (skip_trim) { + send_resize_object_map(); + } +} + +void RebuildObjectMapRequest::send_verify_objects() { + CephContext *cct = m_image_ctx.cct; + + m_state = STATE_VERIFY_OBJECTS; + ldout(cct, 5) << this << " send_verify_objects" << dendl; + + uint64_t snap_id; + uint64_t num_objects; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + snap_id = m_image_ctx.snap_id; + num_objects = Striper::get_num_objects(m_image_ctx.layout, + m_image_ctx.get_image_size(snap_id)); + } + + AsyncObjectThrottle::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2)); + AsyncObjectThrottle *throttle = new AsyncObjectThrottle( + *this, context_factory, create_callback_context(), m_prog_ctx, 0, + num_objects); + throttle->start_ops(cct->_conf->rbd_concurrent_management_ops); +} + +bool RebuildObjectMapRequest::send_update_header() { + CephContext *cct = m_image_ctx.cct; + bool lost_exclusive_lock = false; + + m_state = STATE_UPDATE_HEADER; + { + RWLock::RLocker l(m_image_ctx.owner_lock); + if (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during header update" << dendl; + lost_exclusive_lock = true; + } else { + ldout(cct, 5) << this << " send_update_header" << dendl; + + librados::ObjectWriteOperation op; + if (m_image_ctx.image_watcher->is_lock_supported()) { + m_image_ctx.image_watcher->assert_header_locked(&op); + } + cls_client::set_flags(&op, m_image_ctx.snap_id, 0, + RBD_FLAG_OBJECT_MAP_INVALID); + + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, + create_callback_completion(), &op); + assert(r == 0); + + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + m_image_ctx.update_flags(m_image_ctx.snap_id, RBD_FLAG_OBJECT_MAP_INVALID, + false); + return false; + } + } + + if (lost_exclusive_lock) { + complete(-ERESTART); + return false; + } + return true; +} + +uint64_t RebuildObjectMapRequest::get_image_size() const { + assert(m_image_ctx.snap_lock.is_locked()); + if (m_image_ctx.snap_id == CEPH_NOSNAP) { + if (!m_image_ctx.async_resize_reqs.empty()) { + return m_image_ctx.async_resize_reqs.front()->get_image_size(); + } else { + return m_image_ctx.size; + } + } + return m_image_ctx.get_image_size(m_image_ctx.snap_id); +} + +} // namespace librbd diff --git a/src/librbd/RebuildObjectMapRequest.h b/src/librbd/RebuildObjectMapRequest.h new file mode 100644 index 000000000000..4c5b96973b65 --- /dev/null +++ b/src/librbd/RebuildObjectMapRequest.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H +#define CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H + +#include "include/int_types.h" +#include "librbd/AsyncRequest.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +class RebuildObjectMapRequest : public AsyncRequest { +public: + + RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish, + ProgressContext &prog_ctx) + : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx), + m_prog_ctx(prog_ctx), m_attempted_trim(false) + { + } + + virtual void send(); + +protected: + virtual bool should_complete(int r); + +private: + /** + * Rebuild object map goes through the following state machine to + * verify per-object state: + * + * + * . | . . . . . . . . . . + * . | . . + * . v v . + * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE + * . | + * . v + * . . . > STATE_VERIFY_OBJECTS + * | + * v + * STATE_UPDATE_HEADER + * + * The _RESIZE_OBJECT_MAP state will be skipped if the object map + * is appropriately sized for the image. The _TRIM_IMAGE state will + * only be hit if the resize failed due to an in-use object. + */ + enum State { + STATE_RESIZE_OBJECT_MAP, + STATE_TRIM_IMAGE, + STATE_VERIFY_OBJECTS, + STATE_UPDATE_HEADER + }; + + ImageCtx &m_image_ctx; + ProgressContext &m_prog_ctx; + State m_state; + bool m_attempted_trim; + + void send_resize_object_map(); + void send_trim_image(); + void send_verify_objects(); + bool send_update_header(); + + uint64_t get_image_size() const; + +}; + +} // namespace librbd + +#endif // CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H