From: Jason Dillaman Date: Wed, 26 Aug 2015 18:57:16 +0000 (-0400) Subject: librbd: move all image operation state machines to new namespace X-Git-Tag: v10.0.2~193^2~32 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ff924278da701f7c493eda57715afd69c16dd127;p=ceph.git librbd: move all image operation state machines to new namespace Prior to creating multiple new state machines for image operations, move all existing op requests to a new namespace and folder. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/FlattenRequest.cc b/src/librbd/FlattenRequest.cc deleted file mode 100644 index 5b3e4a2e01f..00000000000 --- a/src/librbd/FlattenRequest.cc +++ /dev/null @@ -1,176 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "librbd/FlattenRequest.h" -#include "librbd/AioObjectRequest.h" -#include "librbd/AsyncObjectThrottle.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageWatcher.h" -#include "librbd/ObjectMap.h" -#include "common/dout.h" -#include "common/errno.h" -#include -#include - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::FlattenRequest: " - -namespace librbd { - -class C_FlattenObject : public C_AsyncObjectThrottle<> { -public: - C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, - uint64_t object_size, ::SnapContext snapc, uint64_t object_no) - : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size), - m_snapc(snapc), m_object_no(object_no) - { - } - - virtual int send() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - if (m_image_ctx.image_watcher->is_lock_supported() && - !m_image_ctx.image_watcher->is_lock_owner()) { - ldout(cct, 1) << "lost exclusive lock during flatten" << dendl; - return -ERESTART; - } - - bufferlist bl; - string oid = m_image_ctx.get_object_name(m_object_no); - AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0, - bl, m_snapc, this); - if (!req->has_parent()) { - // stop early if the parent went away - it just means - // another flatten finished first or the image was resized - delete req; - return 1; - } - - req->send(); - return 0; - } - -private: - uint64_t m_object_size; - ::SnapContext m_snapc; - uint64_t m_object_no; -}; - -bool FlattenRequest::should_complete(int r) { - CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; - if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) { - lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl; - return true; - } - - RWLock::RLocker owner_locker(m_image_ctx.owner_lock); - switch (m_state) { - case STATE_FLATTEN_OBJECTS: - ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl; - return send_update_header(); - - case STATE_UPDATE_HEADER: - ldout(cct, 5) << "UPDATE_HEADER" << dendl; - return send_update_children(); - - case STATE_UPDATE_CHILDREN: - ldout(cct, 5) << "UPDATE_CHILDREN" << dendl; - return true; - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; - } - return false; -} - -void FlattenRequest::send() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " send" << dendl; - - m_state = STATE_FLATTEN_OBJECTS; - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc, - boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx, - 0, m_overlap_objects); - throttle->start_ops(m_image_ctx.concurrent_management_ops); -} - -bool FlattenRequest::send_update_header() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - ldout(cct, 5) << this << " send_update_header" << dendl; - m_state = STATE_UPDATE_HEADER; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - { - RWLock::RLocker parent_locker(m_image_ctx.parent_lock); - // stop early if the parent went away - it just means - // another flatten finished first, so this one is useless. - if (!m_image_ctx.parent) { - ldout(cct, 5) << "image already flattened" << dendl; - return true; - } - m_parent_spec = m_image_ctx.parent_md.spec; - } - m_ignore_enoent = true; - - // remove parent from this (base) image - librados::ObjectWriteOperation op; - if (m_image_ctx.image_watcher->is_lock_supported()) { - m_image_ctx.image_watcher->assert_header_locked(&op); - } - cls_client::remove_parent(&op); - - librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, - rados_completion, &op); - assert(r == 0); - rados_completion->release(); - return false; -} - -bool FlattenRequest::send_update_children() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - // if there are no snaps, remove from the children object as well - // (if snapshots remain, they have their own parent info, and the child - // will be removed when the last snap goes away) - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 && - !m_image_ctx.snaps.empty()) { - return true; - } - - ldout(cct, 2) << "removing child from children list..." << dendl; - m_state = STATE_UPDATE_CHILDREN; - - librados::ObjectWriteOperation op; - cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id); - - librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion, - &op); - assert(r == 0); - rados_completion->release(); - return false; -} - -} // namespace librbd diff --git a/src/librbd/FlattenRequest.h b/src/librbd/FlattenRequest.h deleted file mode 100644 index 02155f2e314..00000000000 --- a/src/librbd/FlattenRequest.h +++ /dev/null @@ -1,81 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_FLATTEN_REQUEST_H -#define CEPH_LIBRBD_FLATTEN_REQUEST_H - -#include "librbd/AsyncRequest.h" -#include "librbd/parent_types.h" -#include "common/snap_types.h" - -namespace librbd { - -class ImageCtx; -class ProgressContext; - -class FlattenRequest : public AsyncRequest<> -{ -public: - FlattenRequest(ImageCtx &image_ctx, Context *on_finish, - uint64_t object_size, uint64_t overlap_objects, - const ::SnapContext &snapc, ProgressContext &prog_ctx) - : AsyncRequest(image_ctx, on_finish), m_object_size(object_size), - m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx), - m_ignore_enoent(false) - { - } - - virtual void send(); - -protected: - virtual bool should_complete(int r); - -private: - /** - * Flatten goes through the following state machine to copyup objects - * from the parent image: - * - * @verbatim - * - * - * | - * v - * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . . - * . | . - * . | . - * . v . - * . STATE_UPDATE_CHILDREN . - * . | . - * . | . - * . \---> < . . - * . ^ - * . . - * . . . . . . . . . . . . . . . . . . . - * - * @endverbatim - * - * The _UPDATE_CHILDREN state will be skipped if the image has one or - * more snapshots. The _UPDATE_HEADER state will be skipped if the - * image was concurrently flattened by another client. - */ - enum State { - STATE_FLATTEN_OBJECTS, - STATE_UPDATE_HEADER, - STATE_UPDATE_CHILDREN - }; - - uint64_t m_object_size; - uint64_t m_overlap_objects; - ::SnapContext m_snapc; - ProgressContext &m_prog_ctx; - State m_state; - - parent_spec m_parent_spec; - bool m_ignore_enoent; - - bool send_update_header(); - bool send_update_children(); -}; - -} // namespace librbd - -#endif // CEPH_LIBRBD_FLATTEN_REQUEST_H diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index d19c7c4515f..2f0bb39c8e1 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -19,7 +19,7 @@ #include "librbd/Journal.h" #include "librbd/LibrbdAdminSocketHook.h" #include "librbd/ObjectMap.h" -#include "librbd/ResizeRequest.h" +#include "librbd/operation/ResizeRequest.h" #include diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 15b476d42b7..d7010e3b913 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -44,7 +44,10 @@ namespace librbd { class LibrbdAdminSocketHook; class ImageWatcher; class Journal; + + namespace operation { class ResizeRequest; + } struct ImageCtx { CephContext *cct; @@ -133,7 +136,7 @@ namespace librbd { atomic_t async_request_seq; - xlist resize_reqs; + xlist resize_reqs; AioImageRequestWQ *aio_work_queue; ContextWQ *op_work_queue; diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am index dab84c61b2f..c8d840e9060 100644 --- a/src/librbd/Makefile.am +++ b/src/librbd/Makefile.am @@ -20,15 +20,15 @@ librbd_internal_la_SOURCES = \ librbd/ImageCtx.cc \ librbd/ImageWatcher.cc \ librbd/internal.cc \ - librbd/FlattenRequest.cc \ librbd/Journal.cc \ librbd/JournalReplay.cc \ librbd/LibrbdAdminSocketHook.cc \ librbd/LibrbdWriteback.cc \ librbd/ObjectMap.cc \ - librbd/RebuildObjectMapRequest.cc \ - librbd/ResizeRequest.cc \ - librbd/TrimRequest.cc + librbd/operation/FlattenRequest.cc \ + librbd/operation/RebuildObjectMapRequest.cc \ + librbd/operation/ResizeRequest.cc \ + librbd/operation/TrimRequest.cc noinst_LTLIBRARIES += librbd_internal.la librbd_api_la_SOURCES = \ @@ -66,7 +66,6 @@ noinst_HEADERS += \ librbd/ImageCtx.h \ librbd/ImageWatcher.h \ librbd/internal.h \ - librbd/FlattenRequest.h \ librbd/Journal.h \ librbd/JournalReplay.h \ librbd/JournalTypes.h \ @@ -74,12 +73,13 @@ noinst_HEADERS += \ librbd/LibrbdWriteback.h \ librbd/ObjectMap.h \ librbd/parent_types.h \ - librbd/RebuildObjectMapRequest.h \ - librbd/ResizeRequest.h \ librbd/SnapInfo.h \ librbd/TaskFinisher.h \ - librbd/TrimRequest.h \ - librbd/WatchNotifyTypes.h + librbd/WatchNotifyTypes.h \ + librbd/operation/FlattenRequest.h \ + librbd/operation/RebuildObjectMapRequest.h \ + librbd/operation/ResizeRequest.h \ + librbd/operation/TrimRequest.h endif # WITH_RBD endif # WITH_RADOS diff --git a/src/librbd/RebuildObjectMapRequest.cc b/src/librbd/RebuildObjectMapRequest.cc deleted file mode 100644 index 5d5a8f00b72..00000000000 --- a/src/librbd/RebuildObjectMapRequest.cc +++ /dev/null @@ -1,355 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "librbd/RebuildObjectMapRequest.h" -#include "common/dout.h" -#include "common/errno.h" -#include "librbd/AsyncObjectThrottle.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageWatcher.h" -#include "librbd/internal.h" -#include "librbd/ObjectMap.h" -#include "librbd/ResizeRequest.h" -#include "librbd/TrimRequest.h" -#include -#include - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: " - -namespace librbd { - -namespace { - -class C_VerifyObject : public C_AsyncObjectThrottle<> { -public: - C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, - uint64_t snap_id, uint64_t object_no) - : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id), - m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no)) - { - m_io_ctx.dup(m_image_ctx.md_ctx); - m_io_ctx.snap_set_read(CEPH_SNAPDIR); - } - - virtual void complete(int r) { - if (should_complete(r)) { - ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed " - << dendl; - finish(r); - delete this; - } - } - - virtual int send() { - send_list_snaps(); - return 0; - } - -private: - librados::IoCtx m_io_ctx; - uint64_t m_snap_id; - uint64_t m_object_no; - std::string m_oid; - - librados::snap_set_t m_snap_set; - int m_snap_list_ret; - - bool should_complete(int r) { - CephContext *cct = m_image_ctx.cct; - if (r == 0) { - r = m_snap_list_ret; - } - if (r < 0 && r != -ENOENT) { - lderr(cct) << m_oid << " C_VerifyObject::should_complete: " - << "encountered an error: " << cpp_strerror(r) << dendl; - return true; - } - - ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r=" - << r << dendl; - return update_object_map(get_object_state()); - } - - void send_list_snaps() { - assert(m_image_ctx.owner_lock.is_locked()); - ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps" - << dendl; - - librados::AioCompletion *comp = librados::Rados::aio_create_completion( - this, NULL, rados_ctx_cb); - - librados::ObjectReadOperation op; - op.list_snaps(&m_snap_set, &m_snap_list_ret); - - int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL); - assert(r == 0); - comp->release(); - } - - uint8_t get_object_state() { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - for (std::vector::const_iterator r = - m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) { - librados::snap_t from_snap_id; - librados::snap_t to_snap_id; - if (r->cloneid == librados::SNAP_HEAD) { - from_snap_id = next_valid_snap_id(m_snap_set.seq + 1); - to_snap_id = librados::SNAP_HEAD; - } else { - from_snap_id = next_valid_snap_id(r->snaps[0]); - to_snap_id = r->snaps[r->snaps.size()-1]; - } - - if (to_snap_id < m_snap_id) { - continue; - } else if (m_snap_id < from_snap_id) { - break; - } - - if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 && - from_snap_id != m_snap_id) { - return OBJECT_EXISTS_CLEAN; - } - return OBJECT_EXISTS; - } - return OBJECT_NONEXISTENT; - } - - uint64_t next_valid_snap_id(uint64_t snap_id) { - assert(m_image_ctx.snap_lock.is_locked()); - - std::map::iterator it = - m_image_ctx.snap_info.lower_bound(snap_id); - if (it == m_image_ctx.snap_info.end()) { - return CEPH_NOSNAP; - } - return it->first; - } - - bool update_object_map(uint8_t new_state) { - RWLock::RLocker owner_locker(m_image_ctx.owner_lock); - CephContext *cct = m_image_ctx.cct; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - RWLock::WLocker l(m_image_ctx.object_map_lock); - uint8_t state = m_image_ctx.object_map[m_object_no]; - if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT && - m_snap_id == CEPH_NOSNAP) { - // might be writing object to OSD concurrently - new_state = state; - } - - if (new_state != state) { - ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map " - << static_cast(state) << "->" - << static_cast(new_state) << dendl; - m_image_ctx.object_map[m_object_no] = new_state; - } - return true; - } -}; - -} // anonymous namespace - - -void RebuildObjectMapRequest::send() { - send_resize_object_map(); -} - -bool RebuildObjectMapRequest::should_complete(int r) { - CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; - - RWLock::RLocker owner_lock(m_image_ctx.owner_lock); - switch (m_state) { - case STATE_RESIZE_OBJECT_MAP: - ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl; - if (r == -ESTALE && !m_attempted_trim) { - // objects are still flagged as in-use -- delete them - m_attempted_trim = true; - send_trim_image(); - return false; - } else if (r == 0) { - send_verify_objects(); - } - break; - - case STATE_TRIM_IMAGE: - ldout(cct, 5) << "TRIM_IMAGE" << dendl; - if (r == 0) { - send_resize_object_map(); - } - break; - - case STATE_VERIFY_OBJECTS: - ldout(cct, 5) << "VERIFY_OBJECTS" << dendl; - if (r == 0) { - send_save_object_map(); - } - break; - - case STATE_SAVE_OBJECT_MAP: - ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl; - if (r == 0) { - send_update_header(); - } - break; - case STATE_UPDATE_HEADER: - ldout(cct, 5) << "UPDATE_HEADER" << dendl; - if (r == 0) { - return true; - } - break; - - default: - assert(false); - break; - } - - if (r < 0) { - lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r) - << dendl; - return true; - } - return false; -} - -void RebuildObjectMapRequest::send_resize_object_map() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - uint64_t num_objects; - uint64_t size; - { - RWLock::RLocker l(m_image_ctx.snap_lock); - size = get_image_size(); - num_objects = Striper::get_num_objects(m_image_ctx.layout, size); - } - - if (m_image_ctx.object_map.size() == num_objects) { - send_verify_objects(); - return; - } - - ldout(cct, 5) << this << " send_resize_object_map" << dendl; - m_state = STATE_RESIZE_OBJECT_MAP; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT, - create_callback_context()); -} - -void RebuildObjectMapRequest::send_trim_image() { - CephContext *cct = m_image_ctx.cct; - - RWLock::RLocker l(m_image_ctx.owner_lock); - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - ldout(cct, 5) << this << " send_trim_image" << dendl; - m_state = STATE_TRIM_IMAGE; - - uint64_t new_size; - uint64_t orig_size; - { - RWLock::RLocker l(m_image_ctx.snap_lock); - new_size = get_image_size(); - orig_size = m_image_ctx.get_object_size() * - m_image_ctx.object_map.size(); - } - TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(), - orig_size, new_size, m_prog_ctx); - req->send(); -} - -void RebuildObjectMapRequest::send_verify_objects() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - uint64_t snap_id; - uint64_t num_objects; - { - RWLock::RLocker l(m_image_ctx.snap_lock); - snap_id = m_image_ctx.snap_id; - num_objects = Striper::get_num_objects(m_image_ctx.layout, - m_image_ctx.get_image_size(snap_id)); - } - - if (num_objects == 0) { - send_save_object_map(); - return; - } - - m_state = STATE_VERIFY_OBJECTS; - ldout(cct, 5) << this << " send_verify_objects" << dendl; - - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx, - 0, num_objects); - throttle->start_ops(cct->_conf->rbd_concurrent_management_ops); -} - -void RebuildObjectMapRequest::send_save_object_map() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - - ldout(cct, 5) << this << " send_save_object_map" << dendl; - m_state = STATE_SAVE_OBJECT_MAP; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - m_image_ctx.object_map.aio_save(create_callback_context()); -} - -void RebuildObjectMapRequest::send_update_header() { - assert(m_image_ctx.owner_lock.is_locked()); - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl; - m_state = STATE_UPDATE_HEADER; - - librados::ObjectWriteOperation op; - if (m_image_ctx.image_watcher->is_lock_supported()) { - m_image_ctx.image_watcher->assert_header_locked(&op); - } - - uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID; - cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags); - - librados::AioCompletion *comp = create_callback_completion(); - int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); - assert(r == 0); - comp->release(); - - RWLock::WLocker snap_locker(m_image_ctx.snap_lock); - m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false); -} - -uint64_t RebuildObjectMapRequest::get_image_size() const { - assert(m_image_ctx.snap_lock.is_locked()); - if (m_image_ctx.snap_id == CEPH_NOSNAP) { - if (!m_image_ctx.resize_reqs.empty()) { - return m_image_ctx.resize_reqs.front()->get_image_size(); - } else { - return m_image_ctx.size; - } - } - return m_image_ctx.get_image_size(m_image_ctx.snap_id); -} - -} // namespace librbd diff --git a/src/librbd/RebuildObjectMapRequest.h b/src/librbd/RebuildObjectMapRequest.h deleted file mode 100644 index 02a41ef568c..00000000000 --- a/src/librbd/RebuildObjectMapRequest.h +++ /dev/null @@ -1,78 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H -#define CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H - -#include "include/int_types.h" -#include "librbd/AsyncRequest.h" - -namespace librbd { - -class ImageCtx; -class ProgressContext; - -class RebuildObjectMapRequest : public AsyncRequest<> { -public: - - RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish, - ProgressContext &prog_ctx) - : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx), - m_prog_ctx(prog_ctx), m_attempted_trim(false) - { - } - - virtual void send(); - -protected: - virtual bool should_complete(int r); - -private: - /** - * Rebuild object map goes through the following state machine to - * verify per-object state: - * - * - * . | . . . . . . . . . . - * . | . . - * . v v . - * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE - * . | - * . v - * . . . > STATE_VERIFY_OBJECTS - * | - * v - * STATE_SAVE_OBJECT_MAP - * | - * v - * STATE_UPDATE_HEADER - * - * The _RESIZE_OBJECT_MAP state will be skipped if the object map - * is appropriately sized for the image. The _TRIM_IMAGE state will - * only be hit if the resize failed due to an in-use object. - */ - enum State { - STATE_RESIZE_OBJECT_MAP, - STATE_TRIM_IMAGE, - STATE_VERIFY_OBJECTS, - STATE_SAVE_OBJECT_MAP, - STATE_UPDATE_HEADER - }; - - ImageCtx &m_image_ctx; - ProgressContext &m_prog_ctx; - State m_state; - bool m_attempted_trim; - - void send_resize_object_map(); - void send_trim_image(); - void send_verify_objects(); - void send_save_object_map(); - void send_update_header(); - - uint64_t get_image_size() const; - -}; - -} // namespace librbd - -#endif // CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H diff --git a/src/librbd/ResizeRequest.cc b/src/librbd/ResizeRequest.cc deleted file mode 100644 index 186e730f39d..00000000000 --- a/src/librbd/ResizeRequest.cc +++ /dev/null @@ -1,265 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#include "librbd/ResizeRequest.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageWatcher.h" -#include "librbd/internal.h" -#include "librbd/ObjectMap.h" -#include "librbd/TrimRequest.h" -#include "common/dout.h" -#include "common/errno.h" - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::ResizeRequest: " - -namespace librbd -{ - -ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish, - uint64_t new_size, - ProgressContext &prog_ctx) - : AsyncRequest(image_ctx, on_finish), - m_original_size(0), m_new_size(new_size), - m_prog_ctx(prog_ctx), m_new_parent_overlap(0), - m_xlist_item(this) -{ -} - -ResizeRequest::~ResizeRequest() { - ResizeRequest *next_req = NULL; - { - RWLock::WLocker snap_locker(m_image_ctx.snap_lock); - assert(m_xlist_item.remove_myself()); - if (!m_image_ctx.resize_reqs.empty()) { - next_req = m_image_ctx.resize_reqs.front(); - } - } - - if (next_req != NULL) { - RWLock::RLocker owner_locker(m_image_ctx.owner_lock); - next_req->send(); - } -} - -bool ResizeRequest::should_complete(int r) { - CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; - - if (r < 0) { - lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl; - return true; - } - if (m_state == STATE_FINISHED) { - ldout(cct, 5) << "FINISHED" << dendl; - return true; - } - - RWLock::RLocker owner_lock(m_image_ctx.owner_lock); - switch (m_state) { - case STATE_FLUSH: - ldout(cct, 5) << "FLUSH" << dendl; - send_invalidate_cache(); - break; - - case STATE_INVALIDATE_CACHE: - ldout(cct, 5) << "INVALIDATE_CACHE" << dendl; - send_trim_image(); - break; - - case STATE_TRIM_IMAGE: - ldout(cct, 5) << "TRIM_IMAGE" << dendl; - send_update_header(); - break; - - case STATE_GROW_OBJECT_MAP: - ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl; - send_update_header(); - break; - - case STATE_UPDATE_HEADER: - ldout(cct, 5) << "UPDATE_HEADER" << dendl; - if (send_shrink_object_map()) { - update_size_and_overlap(); - return true; - } - break; - - case STATE_SHRINK_OBJECT_MAP: - ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl; - update_size_and_overlap(); - return true; - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; - } - return false; -} - -void ResizeRequest::send() { - assert(m_image_ctx.owner_lock.is_locked()); - - { - RWLock::WLocker snap_locker(m_image_ctx.snap_lock); - if (!m_xlist_item.is_on_list()) { - m_image_ctx.resize_reqs.push_back(&m_xlist_item); - if (m_image_ctx.resize_reqs.front() != this) { - return; - } - } - - assert(m_image_ctx.resize_reqs.front() == this); - m_original_size = m_image_ctx.size; - compute_parent_overlap(); - } - - CephContext *cct = m_image_ctx.cct; - if (is_canceled()) { - complete(-ERESTART); - } else if (m_original_size == m_new_size) { - ldout(cct, 2) << this << " no change in size (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - m_state = STATE_FINISHED; - complete(0); - } else if (m_new_size > m_original_size) { - ldout(cct, 2) << this << " expanding image (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - send_grow_object_map(); - } else { - ldout(cct, 2) << this << " shrinking image (" << m_original_size - << " -> " << m_new_size << ")" << dendl; - send_flush(); - } -} - -void ResizeRequest::send_flush() { - ldout(m_image_ctx.cct, 5) << this << " send_flush: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_FLUSH; - - // with clipping adjusted, ensure that write / copy-on-read operations won't - // (re-)create objects that we just removed. need async callback to ensure - // we don't have cache_lock already held - m_image_ctx.flush_async_operations(create_async_callback_context()); -} - -void ResizeRequest::send_invalidate_cache() { - assert(m_image_ctx.owner_lock.is_locked()); - ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_INVALIDATE_CACHE; - - // need to invalidate since we're deleting objects, and - // ObjectCacher doesn't track non-existent objects - m_image_ctx.invalidate_cache(create_callback_context()); -} - -void ResizeRequest::send_trim_image() { - assert(m_image_ctx.owner_lock.is_locked()); - ldout(m_image_ctx.cct, 5) << this << " send_trim_image: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_TRIM_IMAGE; - - TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(), - m_original_size, m_new_size, m_prog_ctx); - req->send(); -} - -void ResizeRequest::send_grow_object_map() { - assert(m_image_ctx.owner_lock.is_locked()); - if (!m_image_ctx.object_map.enabled()) { - send_update_header(); - return; - } - - ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_GROW_OBJECT_MAP; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT, - create_callback_context()); -} - -bool ResizeRequest::send_shrink_object_map() { - assert(m_image_ctx.owner_lock.is_locked()); - if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) { - return true; - } - - ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_SHRINK_OBJECT_MAP; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT, - create_callback_context()); - return false; -} - -void ResizeRequest::send_update_header() { - assert(m_image_ctx.owner_lock.is_locked()); - - ldout(m_image_ctx.cct, 5) << this << " send_update_header: " - << " original_size=" << m_original_size - << " new_size=" << m_new_size << dendl; - m_state = STATE_UPDATE_HEADER; - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - librados::ObjectWriteOperation op; - if (m_image_ctx.old_format) { - // rewrite only the size field of the header - // NOTE: format 1 image headers are not stored in fixed endian format - bufferlist bl; - bl.append(reinterpret_cast(&m_new_size), sizeof(m_new_size)); - op.write(offsetof(rbd_obj_header_ondisk, image_size), bl); - } else { - if (m_image_ctx.image_watcher->is_lock_supported()) { - m_image_ctx.image_watcher->assert_header_locked(&op); - } - cls_client::set_size(&op, m_new_size); - } - - librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, - rados_completion, &op); - assert(r == 0); - rados_completion->release(); -} - -void ResizeRequest::compute_parent_overlap() { - RWLock::RLocker l2(m_image_ctx.parent_lock); - if (m_image_ctx.parent == NULL) { - m_new_parent_overlap = 0; - } else { - m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap); - } -} - -void ResizeRequest::update_size_and_overlap() { - RWLock::WLocker snap_locker(m_image_ctx.snap_lock); - m_image_ctx.size = m_new_size; - - RWLock::WLocker parent_locker(m_image_ctx.parent_lock); - if (m_image_ctx.parent != NULL && m_new_size < m_original_size) { - m_image_ctx.parent_md.overlap = m_new_parent_overlap; - } -} - -} // namespace librbd diff --git a/src/librbd/ResizeRequest.h b/src/librbd/ResizeRequest.h deleted file mode 100644 index 8a0447858b2..00000000000 --- a/src/librbd/ResizeRequest.h +++ /dev/null @@ -1,101 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_RESIZE_REQUEST_H -#define CEPH_LIBRBD_RESIZE_REQUEST_H - -#include "librbd/AsyncRequest.h" -#include "include/xlist.h" - -namespace librbd -{ - -class ImageCtx; -class ProgressContext; - -class ResizeRequest : public AsyncRequest<> -{ -public: - ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size, - ProgressContext &prog_ctx); - virtual ~ResizeRequest(); - - virtual void send(); - - inline bool shrinking() const { - return m_new_size < m_original_size; - } - - inline uint64_t get_image_size() const { - return m_new_size; - } - -private: - /** - * Resize goes through the following state machine to resize the image - * and update the object map: - * - * @verbatim - * - * -------------> STATE_FINISHED -----------------------------\ - * | . (no change) | - * | . | - * | . . . . . . . . . . . . . . . . . . . . . | - * | . | - * | v | - * |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------| - * | (grow) | - * | | - * | | - * \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE | - * (shrink) | | - * | | - * /----------------------/ | - * | | - * v | - * STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . | - * | . | - * | . | - * v v v - * STATE_SHRINK_OBJECT_MAP ---> - * - * @endverbatim - * - * The _OBJECT_MAP states are skipped if the object map isn't enabled. - * The state machine will immediately transition to _FINISHED if there - * are no objects to trim. - */ - enum State { - STATE_FLUSH, - STATE_INVALIDATE_CACHE, - STATE_TRIM_IMAGE, - STATE_GROW_OBJECT_MAP, - STATE_UPDATE_HEADER, - STATE_SHRINK_OBJECT_MAP, - STATE_FINISHED - }; - - State m_state; - uint64_t m_original_size; - uint64_t m_new_size; - ProgressContext &m_prog_ctx; - uint64_t m_new_parent_overlap; - - xlist::item m_xlist_item; - - virtual bool should_complete(int r); - - void send_flush(); - void send_invalidate_cache(); - void send_trim_image(); - void send_grow_object_map(); - bool send_shrink_object_map(); - void send_update_header(); - - void compute_parent_overlap(); - void update_size_and_overlap(); - -}; - -} // namespace librbd - -#endif // CEPH_LIBRBD_RESIZE_REQUEST_H diff --git a/src/librbd/TrimRequest.cc b/src/librbd/TrimRequest.cc deleted file mode 100644 index afef4d9da33..00000000000 --- a/src/librbd/TrimRequest.cc +++ /dev/null @@ -1,356 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#include "librbd/TrimRequest.h" -#include "librbd/AsyncObjectThrottle.h" -#include "librbd/AioObjectRequest.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageWatcher.h" -#include "librbd/internal.h" -#include "librbd/ObjectMap.h" -#include "common/ContextCompletion.h" -#include "common/dout.h" -#include "common/errno.h" -#include "osdc/Striper.h" - -#include -#include -#include -#include - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::TrimRequest: " - -namespace librbd -{ - -class C_CopyupObject : public C_AsyncObjectThrottle<> { -public: - C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, - ::SnapContext snapc, uint64_t object_no) - : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc), - m_object_no(object_no) - { - } - - virtual int send() { - assert(m_image_ctx.owner_lock.is_locked()); - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - string oid = m_image_ctx.get_object_name(m_object_no); - ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; - - AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no, - m_snapc, this); - req->send(); - return 0; - } -private: - ::SnapContext m_snapc; - uint64_t m_object_no; -}; - -class C_RemoveObject : public C_AsyncObjectThrottle<> { -public: - C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, - uint64_t object_no) - : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no) - { - } - - virtual int send() { - assert(m_image_ctx.owner_lock.is_locked()); - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - if (!m_image_ctx.object_map.object_may_exist(m_object_no)) { - return 1; - } - - string oid = m_image_ctx.get_object_name(m_object_no); - ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl; - - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb); - int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion); - assert(r == 0); - rados_completion->release(); - return 0; - } - -private: - uint64_t m_object_no; -}; - -TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish, - uint64_t original_size, uint64_t new_size, - ProgressContext &prog_ctx) - : AsyncRequest(image_ctx, on_finish), m_new_size(new_size), - m_prog_ctx(prog_ctx) -{ - uint64_t period = m_image_ctx.get_stripe_period(); - uint64_t new_num_periods = ((m_new_size + period - 1) / period); - m_delete_off = MIN(new_num_periods * period, original_size); - // first object we can delete free and clear - m_delete_start = new_num_periods * m_image_ctx.get_stripe_count(); - m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size); - - CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << this << " trim image " << original_size << " -> " - << m_new_size << " periods " << new_num_periods - << " discard to offset " << m_delete_off - << " delete objects " << m_delete_start - << " to " << m_num_objects << dendl; -} - - -bool TrimRequest::should_complete(int r) -{ - CephContext *cct = m_image_ctx.cct; - ldout(cct, 5) << this << " should_complete: r=" << r << dendl; - if (r < 0) { - lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl; - return true; - } - - RWLock::RLocker owner_lock(m_image_ctx.owner_lock); - switch (m_state) { - case STATE_COPYUP_OBJECTS: - ldout(cct, 5) << " COPYUP_OBJECTS" << dendl; - send_pre_remove(); - break; - - case STATE_PRE_REMOVE: - ldout(cct, 5) << " PRE_REMOVE" << dendl; - send_remove_objects(); - break; - - case STATE_REMOVE_OBJECTS: - ldout(cct, 5) << " REMOVE_OBJECTS" << dendl; - send_post_remove(); - break; - - case STATE_POST_REMOVE: - ldout(cct, 5) << " POST_OBJECTS" << dendl; - send_clean_boundary(); - break; - - case STATE_CLEAN_BOUNDARY: - ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl; - finish(0); - break; - - case STATE_FINISHED: - ldout(cct, 5) << "FINISHED" << dendl; - return true; - - default: - lderr(cct) << "invalid state: " << m_state << dendl; - assert(false); - break; - } - return false; -} - -void TrimRequest::send() { - send_copyup_objects(); -} - -void TrimRequest::send_copyup_objects() { - assert(m_image_ctx.owner_lock.is_locked()); - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - - if (m_delete_start >= m_num_objects) { - send_clean_boundary(); - return; - } - - ::SnapContext snapc; - bool has_snapshots; - uint64_t parent_overlap; - { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - RWLock::RLocker parent_locker(m_image_ctx.parent_lock); - - snapc = m_image_ctx.snapc; - has_snapshots = !m_image_ctx.snaps.empty(); - int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(), - &parent_overlap); - assert(r == 0); - } - - // copyup is only required for portion of image that overlaps parent - uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout, - parent_overlap); - // TODO: protect against concurrent shrink and snap create? - if (copyup_end <= m_delete_start || !has_snapshots) { - send_pre_remove(); - return; - } - - uint64_t copyup_start = m_delete_start; - m_delete_start = copyup_end; - - ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: " - << " start object=" << copyup_start << ", " - << " end object=" << copyup_end << dendl; - m_state = STATE_COPYUP_OBJECTS; - - Context *ctx = create_callback_context(); - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start, - copyup_end); - throttle->start_ops(m_image_ctx.concurrent_management_ops); -} - -void TrimRequest::send_remove_objects() { - assert(m_image_ctx.owner_lock.is_locked()); - - ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: " - << " delete_start=" << m_delete_start - << " num_objects=" << m_num_objects << dendl; - m_state = STATE_REMOVE_OBJECTS; - - Context *ctx = create_callback_context(); - AsyncObjectThrottle<>::ContextFactory context_factory( - boost::lambda::bind(boost::lambda::new_ptr(), - boost::lambda::_1, &m_image_ctx, boost::lambda::_2)); - AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( - this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start, - m_num_objects); - throttle->start_ops(m_image_ctx.concurrent_management_ops); -} - -void TrimRequest::send_pre_remove() { - assert(m_image_ctx.owner_lock.is_locked()); - if (m_delete_start >= m_num_objects) { - send_clean_boundary(); - return; - } - - bool remove_objects = false; - { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - if (!m_image_ctx.object_map.enabled()) { - remove_objects = true; - } else { - ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: " - << " delete_start=" << m_delete_start - << " num_objects=" << m_num_objects << dendl; - m_state = STATE_PRE_REMOVE; - - assert(m_image_ctx.image_watcher->is_lock_owner()); - - // flag the objects as pending deletion - Context *ctx = create_callback_context(); - RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); - if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects, - OBJECT_PENDING, OBJECT_EXISTS, - ctx)) { - delete ctx; - remove_objects = true; - } - } - } - - // avoid possible recursive lock attempts - if (remove_objects) { - // no object map update required - send_remove_objects(); - } -} - -void TrimRequest::send_post_remove() { - assert(m_image_ctx.owner_lock.is_locked()); - - bool clean_boundary = false; - { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - if (!m_image_ctx.object_map.enabled()) { - clean_boundary = true; - } else { - ldout(m_image_ctx.cct, 5) << this << " send_post_remove: " - << " delete_start=" << m_delete_start - << " num_objects=" << m_num_objects << dendl; - m_state = STATE_POST_REMOVE; - - assert(m_image_ctx.image_watcher->is_lock_owner()); - - // flag the pending objects as removed - Context *ctx = create_callback_context(); - RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); - if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects, - OBJECT_NONEXISTENT, - OBJECT_PENDING, ctx)) { - delete ctx; - clean_boundary = true; - } - } - } - - // avoid possible recursive lock attempts - if (clean_boundary) { - // no object map update required - send_clean_boundary(); - } -} - -void TrimRequest::send_clean_boundary() { - assert(m_image_ctx.owner_lock.is_locked()); - CephContext *cct = m_image_ctx.cct; - if (m_delete_off <= m_new_size) { - finish(0); - return; - } - - // should have been canceled prior to releasing lock - assert(!m_image_ctx.image_watcher->is_lock_supported() || - m_image_ctx.image_watcher->is_lock_owner()); - uint64_t delete_len = m_delete_off - m_new_size; - ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: " - << " delete_off=" << m_delete_off - << " length=" << delete_len << dendl; - m_state = STATE_CLEAN_BOUNDARY; - - ::SnapContext snapc; - { - RWLock::RLocker snap_locker(m_image_ctx.snap_lock); - snapc = m_image_ctx.snapc; - } - - // discard the weird boundary - std::vector extents; - Striper::file_to_extents(cct, m_image_ctx.format_string, - &m_image_ctx.layout, m_new_size, delete_len, 0, - extents); - - ContextCompletion *completion = - new ContextCompletion(create_callback_context(), true); - for (vector::iterator p = extents.begin(); - p != extents.end(); ++p) { - ldout(cct, 20) << " ex " << *p << dendl; - Context *req_comp = new C_ContextCompletion(*completion); - - AioObjectRequest *req; - if (p->offset == 0) { - req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc, - req_comp); - } else { - req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno, - p->offset, snapc, req_comp); - } - req->send(); - } - completion->finish_adding_requests(); -} - -void TrimRequest::finish(int r) { - m_state = STATE_FINISHED; - async_complete(r); -} - -} // namespace librbd diff --git a/src/librbd/TrimRequest.h b/src/librbd/TrimRequest.h deleted file mode 100644 index 223c6005e3b..00000000000 --- a/src/librbd/TrimRequest.h +++ /dev/null @@ -1,90 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#ifndef CEPH_LIBRBD_TRIM_REQUEST_H -#define CEPH_LIBRBD_TRIM_REQUEST_H - -#include "librbd/AsyncRequest.h" - -namespace librbd -{ - -class ImageCtx; -class ProgressContext; - -class TrimRequest : public AsyncRequest<> -{ -public: - TrimRequest(ImageCtx &image_ctx, Context *on_finish, - uint64_t original_size, uint64_t new_size, - ProgressContext &prog_ctx); - - virtual void send(); - -protected: - /** - * Trim goes through the following state machine to remove whole objects, - * clean partially trimmed objects, and update the object map: - * - * @verbatim - * - * . . . . > STATE_FINISHED . . . . . . . . . - * | . . - * | . . . . . . . . . . . . . - * | . . - * v . . - * STATE_COPYUP_OBJECTS . . . . . - * | . . . - * | . . . - * v v v . - * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS . - * | . . . - * /-----------------------/ . . . . . . . . - * | . . . - * v v v v - * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> - * . ^ - * . . - * . . . . . . . . . . . . . . . . . . . . . . . - * - * @endverbatim - * - * The _COPYUP_OBJECTS state is skipped if there is no parent overlap - * within the new image size and the image does not have any snapshots. - * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map - * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects - * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary - * objects are cleaned. The state machine will immediately transition - * to _FINISHED state if there are no bytes to trim. - */ - - enum State { - STATE_COPYUP_OBJECTS, - STATE_PRE_REMOVE, - STATE_REMOVE_OBJECTS, - STATE_POST_REMOVE, - STATE_CLEAN_BOUNDARY, - STATE_FINISHED - }; - - virtual bool should_complete(int r); - - State m_state; - -private: - uint64_t m_delete_start; - uint64_t m_num_objects; - uint64_t m_delete_off; - uint64_t m_new_size; - ProgressContext &m_prog_ctx; - - void send_copyup_objects(); - void send_remove_objects(); - void send_pre_remove(); - void send_post_remove(); - void send_clean_boundary(); - void finish(int r); -}; - -} // namespace librbd - -#endif // CEPH_LIBRBD_TRIM_REQUEST_H diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 704252bba2b..2cd85bdc36f 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -26,13 +26,13 @@ #include "librbd/ImageCtx.h" #include "librbd/ImageWatcher.h" #include "librbd/internal.h" -#include "librbd/FlattenRequest.h" #include "librbd/Journal.h" #include "librbd/ObjectMap.h" #include "librbd/parent_types.h" -#include "librbd/RebuildObjectMapRequest.h" -#include "librbd/ResizeRequest.h" -#include "librbd/TrimRequest.h" +#include "librbd/operation/FlattenRequest.h" +#include "librbd/operation/RebuildObjectMapRequest.h" +#include "librbd/operation/ResizeRequest.h" +#include "librbd/operation/TrimRequest.h" #include "include/util.h" #include @@ -326,8 +326,8 @@ int invoke_async_request(ImageCtx *ictx, const std::string& request_type, C_SaferCond ctx; ictx->snap_lock.get_read(); - TrimRequest *req = new TrimRequest(*ictx, &ctx, ictx->size, newsize, - prog_ctx); + operation::TrimRequest *req = new operation::TrimRequest( + *ictx, &ctx, ictx->size, newsize, prog_ctx); ictx->snap_lock.put_read(); req->send(); @@ -2335,7 +2335,8 @@ reprotect_and_return_err: ProgressContext& prog_ctx) { assert(ictx->owner_lock.is_locked()); - ResizeRequest *req = new ResizeRequest(*ictx, ctx, new_size, prog_ctx); + operation::ResizeRequest *req = new operation::ResizeRequest( + *ictx, ctx, new_size, prog_ctx); req->send(); } @@ -3338,8 +3339,8 @@ reprotect_and_return_err: overlap_objects = Striper::get_num_objects(ictx->layout, overlap); } - FlattenRequest *req = new FlattenRequest(*ictx, ctx, object_size, - overlap_objects, snapc, prog_ctx); + operation::FlattenRequest *req = new operation::FlattenRequest( + *ictx, ctx, object_size, overlap_objects, snapc, prog_ctx); req->send(); return 0; } @@ -3389,8 +3390,8 @@ reprotect_and_return_err: return r; } - RebuildObjectMapRequest *req = new RebuildObjectMapRequest(*ictx, ctx, - prog_ctx); + operation::RebuildObjectMapRequest *req = + new operation::RebuildObjectMapRequest(*ictx, ctx, prog_ctx); req->send(); return 0; } diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc new file mode 100644 index 00000000000..e3d91812a13 --- /dev/null +++ b/src/librbd/operation/FlattenRequest.cc @@ -0,0 +1,178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/FlattenRequest.h" +#include "librbd/AioObjectRequest.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/ObjectMap.h" +#include "common/dout.h" +#include "common/errno.h" +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::FlattenRequest: " + +namespace librbd { +namespace operation { + +class C_FlattenObject : public C_AsyncObjectThrottle<> { +public: + C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, + uint64_t object_size, ::SnapContext snapc, uint64_t object_no) + : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size), + m_snapc(snapc), m_object_no(object_no) + { + } + + virtual int send() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + if (m_image_ctx.image_watcher->is_lock_supported() && + !m_image_ctx.image_watcher->is_lock_owner()) { + ldout(cct, 1) << "lost exclusive lock during flatten" << dendl; + return -ERESTART; + } + + bufferlist bl; + string oid = m_image_ctx.get_object_name(m_object_no); + AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0, + bl, m_snapc, this); + if (!req->has_parent()) { + // stop early if the parent went away - it just means + // another flatten finished first or the image was resized + delete req; + return 1; + } + + req->send(); + return 0; + } + +private: + uint64_t m_object_size; + ::SnapContext m_snapc; + uint64_t m_object_no; +}; + +bool FlattenRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) { + lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_FLATTEN_OBJECTS: + ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl; + return send_update_header(); + + case STATE_UPDATE_HEADER: + ldout(cct, 5) << "UPDATE_HEADER" << dendl; + return send_update_children(); + + case STATE_UPDATE_CHILDREN: + ldout(cct, 5) << "UPDATE_CHILDREN" << dendl; + return true; + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; + } + return false; +} + +void FlattenRequest::send() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " send" << dendl; + + m_state = STATE_FLATTEN_OBJECTS; + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc, + boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx, + 0, m_overlap_objects); + throttle->start_ops(m_image_ctx.concurrent_management_ops); +} + +bool FlattenRequest::send_update_header() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + ldout(cct, 5) << this << " send_update_header" << dendl; + m_state = STATE_UPDATE_HEADER; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + { + RWLock::RLocker parent_locker(m_image_ctx.parent_lock); + // stop early if the parent went away - it just means + // another flatten finished first, so this one is useless. + if (!m_image_ctx.parent) { + ldout(cct, 5) << "image already flattened" << dendl; + return true; + } + m_parent_spec = m_image_ctx.parent_md.spec; + } + m_ignore_enoent = true; + + // remove parent from this (base) image + librados::ObjectWriteOperation op; + if (m_image_ctx.image_watcher->is_lock_supported()) { + m_image_ctx.image_watcher->assert_header_locked(&op); + } + cls_client::remove_parent(&op); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, + rados_completion, &op); + assert(r == 0); + rados_completion->release(); + return false; +} + +bool FlattenRequest::send_update_children() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + // if there are no snaps, remove from the children object as well + // (if snapshots remain, they have their own parent info, and the child + // will be removed when the last snap goes away) + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 && + !m_image_ctx.snaps.empty()) { + return true; + } + + ldout(cct, 2) << "removing child from children list..." << dendl; + m_state = STATE_UPDATE_CHILDREN; + + librados::ObjectWriteOperation op; + cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id); + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion, + &op); + assert(r == 0); + rados_completion->release(); + return false; +} + +} // namespace operation +} // namespace librbd diff --git a/src/librbd/operation/FlattenRequest.h b/src/librbd/operation/FlattenRequest.h new file mode 100644 index 00000000000..8474f9c3dfc --- /dev/null +++ b/src/librbd/operation/FlattenRequest.h @@ -0,0 +1,84 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H +#define CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H + +#include "librbd/AsyncRequest.h" +#include "librbd/parent_types.h" +#include "common/snap_types.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +class FlattenRequest : public AsyncRequest<> +{ +public: + FlattenRequest(ImageCtx &image_ctx, Context *on_finish, + uint64_t object_size, uint64_t overlap_objects, + const ::SnapContext &snapc, ProgressContext &prog_ctx) + : AsyncRequest(image_ctx, on_finish), m_object_size(object_size), + m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx), + m_ignore_enoent(false) + { + } + + virtual void send(); + +protected: + virtual bool should_complete(int r); + +private: + /** + * Flatten goes through the following state machine to copyup objects + * from the parent image: + * + * @verbatim + * + * + * | + * v + * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . . + * . | . + * . | . + * . v . + * . STATE_UPDATE_CHILDREN . + * . | . + * . | . + * . \---> < . . + * . ^ + * . . + * . . . . . . . . . . . . . . . . . . . + * + * @endverbatim + * + * The _UPDATE_CHILDREN state will be skipped if the image has one or + * more snapshots. The _UPDATE_HEADER state will be skipped if the + * image was concurrently flattened by another client. + */ + enum State { + STATE_FLATTEN_OBJECTS, + STATE_UPDATE_HEADER, + STATE_UPDATE_CHILDREN + }; + + uint64_t m_object_size; + uint64_t m_overlap_objects; + ::SnapContext m_snapc; + ProgressContext &m_prog_ctx; + State m_state; + + parent_spec m_parent_spec; + bool m_ignore_enoent; + + bool send_update_header(); + bool send_update_children(); +}; + +} // namespace operation +} // namespace librbd + +#endif // CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc new file mode 100644 index 00000000000..803083c848c --- /dev/null +++ b/src/librbd/operation/RebuildObjectMapRequest.cc @@ -0,0 +1,357 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/RebuildObjectMapRequest.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/operation/ResizeRequest.h" +#include "librbd/operation/TrimRequest.h" +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: " + +namespace librbd { +namespace operation { + +namespace { + +class C_VerifyObject : public C_AsyncObjectThrottle<> { +public: + C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, + uint64_t snap_id, uint64_t object_no) + : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id), + m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no)) + { + m_io_ctx.dup(m_image_ctx.md_ctx); + m_io_ctx.snap_set_read(CEPH_SNAPDIR); + } + + virtual void complete(int r) { + if (should_complete(r)) { + ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed " + << dendl; + finish(r); + delete this; + } + } + + virtual int send() { + send_list_snaps(); + return 0; + } + +private: + librados::IoCtx m_io_ctx; + uint64_t m_snap_id; + uint64_t m_object_no; + std::string m_oid; + + librados::snap_set_t m_snap_set; + int m_snap_list_ret; + + bool should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + if (r == 0) { + r = m_snap_list_ret; + } + if (r < 0 && r != -ENOENT) { + lderr(cct) << m_oid << " C_VerifyObject::should_complete: " + << "encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + + ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r=" + << r << dendl; + return update_object_map(get_object_state()); + } + + void send_list_snaps() { + assert(m_image_ctx.owner_lock.is_locked()); + ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps" + << dendl; + + librados::AioCompletion *comp = librados::Rados::aio_create_completion( + this, NULL, rados_ctx_cb); + + librados::ObjectReadOperation op; + op.list_snaps(&m_snap_set, &m_snap_list_ret); + + int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL); + assert(r == 0); + comp->release(); + } + + uint8_t get_object_state() { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + for (std::vector::const_iterator r = + m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) { + librados::snap_t from_snap_id; + librados::snap_t to_snap_id; + if (r->cloneid == librados::SNAP_HEAD) { + from_snap_id = next_valid_snap_id(m_snap_set.seq + 1); + to_snap_id = librados::SNAP_HEAD; + } else { + from_snap_id = next_valid_snap_id(r->snaps[0]); + to_snap_id = r->snaps[r->snaps.size()-1]; + } + + if (to_snap_id < m_snap_id) { + continue; + } else if (m_snap_id < from_snap_id) { + break; + } + + if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 && + from_snap_id != m_snap_id) { + return OBJECT_EXISTS_CLEAN; + } + return OBJECT_EXISTS; + } + return OBJECT_NONEXISTENT; + } + + uint64_t next_valid_snap_id(uint64_t snap_id) { + assert(m_image_ctx.snap_lock.is_locked()); + + std::map::iterator it = + m_image_ctx.snap_info.lower_bound(snap_id); + if (it == m_image_ctx.snap_info.end()) { + return CEPH_NOSNAP; + } + return it->first; + } + + bool update_object_map(uint8_t new_state) { + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + CephContext *cct = m_image_ctx.cct; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + RWLock::WLocker l(m_image_ctx.object_map_lock); + uint8_t state = m_image_ctx.object_map[m_object_no]; + if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT && + m_snap_id == CEPH_NOSNAP) { + // might be writing object to OSD concurrently + new_state = state; + } + + if (new_state != state) { + ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map " + << static_cast(state) << "->" + << static_cast(new_state) << dendl; + m_image_ctx.object_map[m_object_no] = new_state; + } + return true; + } +}; + +} // anonymous namespace + + +void RebuildObjectMapRequest::send() { + send_resize_object_map(); +} + +bool RebuildObjectMapRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + + RWLock::RLocker owner_lock(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_RESIZE_OBJECT_MAP: + ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl; + if (r == -ESTALE && !m_attempted_trim) { + // objects are still flagged as in-use -- delete them + m_attempted_trim = true; + send_trim_image(); + return false; + } else if (r == 0) { + send_verify_objects(); + } + break; + + case STATE_TRIM_IMAGE: + ldout(cct, 5) << "TRIM_IMAGE" << dendl; + if (r == 0) { + send_resize_object_map(); + } + break; + + case STATE_VERIFY_OBJECTS: + ldout(cct, 5) << "VERIFY_OBJECTS" << dendl; + if (r == 0) { + send_save_object_map(); + } + break; + + case STATE_SAVE_OBJECT_MAP: + ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl; + if (r == 0) { + send_update_header(); + } + break; + case STATE_UPDATE_HEADER: + ldout(cct, 5) << "UPDATE_HEADER" << dendl; + if (r == 0) { + return true; + } + break; + + default: + assert(false); + break; + } + + if (r < 0) { + lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r) + << dendl; + return true; + } + return false; +} + +void RebuildObjectMapRequest::send_resize_object_map() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + uint64_t num_objects; + uint64_t size; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + size = get_image_size(); + num_objects = Striper::get_num_objects(m_image_ctx.layout, size); + } + + if (m_image_ctx.object_map.size() == num_objects) { + send_verify_objects(); + return; + } + + ldout(cct, 5) << this << " send_resize_object_map" << dendl; + m_state = STATE_RESIZE_OBJECT_MAP; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT, + create_callback_context()); +} + +void RebuildObjectMapRequest::send_trim_image() { + CephContext *cct = m_image_ctx.cct; + + RWLock::RLocker l(m_image_ctx.owner_lock); + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + ldout(cct, 5) << this << " send_trim_image" << dendl; + m_state = STATE_TRIM_IMAGE; + + uint64_t new_size; + uint64_t orig_size; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + new_size = get_image_size(); + orig_size = m_image_ctx.get_object_size() * + m_image_ctx.object_map.size(); + } + TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(), + orig_size, new_size, m_prog_ctx); + req->send(); +} + +void RebuildObjectMapRequest::send_verify_objects() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + uint64_t snap_id; + uint64_t num_objects; + { + RWLock::RLocker l(m_image_ctx.snap_lock); + snap_id = m_image_ctx.snap_id; + num_objects = Striper::get_num_objects(m_image_ctx.layout, + m_image_ctx.get_image_size(snap_id)); + } + + if (num_objects == 0) { + send_save_object_map(); + return; + } + + m_state = STATE_VERIFY_OBJECTS; + ldout(cct, 5) << this << " send_verify_objects" << dendl; + + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx, + 0, num_objects); + throttle->start_ops(cct->_conf->rbd_concurrent_management_ops); +} + +void RebuildObjectMapRequest::send_save_object_map() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + + ldout(cct, 5) << this << " send_save_object_map" << dendl; + m_state = STATE_SAVE_OBJECT_MAP; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + m_image_ctx.object_map.aio_save(create_callback_context()); +} + +void RebuildObjectMapRequest::send_update_header() { + assert(m_image_ctx.owner_lock.is_locked()); + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl; + m_state = STATE_UPDATE_HEADER; + + librados::ObjectWriteOperation op; + if (m_image_ctx.image_watcher->is_lock_supported()) { + m_image_ctx.image_watcher->assert_header_locked(&op); + } + + uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID; + cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags); + + librados::AioCompletion *comp = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op); + assert(r == 0); + comp->release(); + + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false); +} + +uint64_t RebuildObjectMapRequest::get_image_size() const { + assert(m_image_ctx.snap_lock.is_locked()); + if (m_image_ctx.snap_id == CEPH_NOSNAP) { + if (!m_image_ctx.resize_reqs.empty()) { + return m_image_ctx.resize_reqs.front()->get_image_size(); + } else { + return m_image_ctx.size; + } + } + return m_image_ctx.get_image_size(m_image_ctx.snap_id); +} + +} // namespace operation +} // namespace librbd diff --git a/src/librbd/operation/RebuildObjectMapRequest.h b/src/librbd/operation/RebuildObjectMapRequest.h new file mode 100644 index 00000000000..3ed49ba5d7f --- /dev/null +++ b/src/librbd/operation/RebuildObjectMapRequest.h @@ -0,0 +1,81 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H +#define CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H + +#include "include/int_types.h" +#include "librbd/AsyncRequest.h" + +namespace librbd { + +class ImageCtx; +class ProgressContext; + +namespace operation { + +class RebuildObjectMapRequest : public AsyncRequest<> { +public: + + RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish, + ProgressContext &prog_ctx) + : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx), + m_prog_ctx(prog_ctx), m_attempted_trim(false) + { + } + + virtual void send(); + +protected: + virtual bool should_complete(int r); + +private: + /** + * Rebuild object map goes through the following state machine to + * verify per-object state: + * + * + * . | . . . . . . . . . . + * . | . . + * . v v . + * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE + * . | + * . v + * . . . > STATE_VERIFY_OBJECTS + * | + * v + * STATE_SAVE_OBJECT_MAP + * | + * v + * STATE_UPDATE_HEADER + * + * The _RESIZE_OBJECT_MAP state will be skipped if the object map + * is appropriately sized for the image. The _TRIM_IMAGE state will + * only be hit if the resize failed due to an in-use object. + */ + enum State { + STATE_RESIZE_OBJECT_MAP, + STATE_TRIM_IMAGE, + STATE_VERIFY_OBJECTS, + STATE_SAVE_OBJECT_MAP, + STATE_UPDATE_HEADER + }; + + ImageCtx &m_image_ctx; + ProgressContext &m_prog_ctx; + State m_state; + bool m_attempted_trim; + + void send_resize_object_map(); + void send_trim_image(); + void send_verify_objects(); + void send_save_object_map(); + void send_update_header(); + + uint64_t get_image_size() const; + +}; + +} // namespace operation +} // namespace librbd + +#endif // CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc new file mode 100644 index 00000000000..03247431f43 --- /dev/null +++ b/src/librbd/operation/ResizeRequest.cc @@ -0,0 +1,267 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/ResizeRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "librbd/operation/TrimRequest.h" +#include "common/dout.h" +#include "common/errno.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::ResizeRequest: " + +namespace librbd { +namespace operation { + +ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish, + uint64_t new_size, + ProgressContext &prog_ctx) + : AsyncRequest(image_ctx, on_finish), + m_original_size(0), m_new_size(new_size), + m_prog_ctx(prog_ctx), m_new_parent_overlap(0), + m_xlist_item(this) +{ +} + +ResizeRequest::~ResizeRequest() { + ResizeRequest *next_req = NULL; + { + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + assert(m_xlist_item.remove_myself()); + if (!m_image_ctx.resize_reqs.empty()) { + next_req = m_image_ctx.resize_reqs.front(); + } + } + + if (next_req != NULL) { + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + next_req->send(); + } +} + +bool ResizeRequest::should_complete(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + if (m_state == STATE_FINISHED) { + ldout(cct, 5) << "FINISHED" << dendl; + return true; + } + + RWLock::RLocker owner_lock(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_FLUSH: + ldout(cct, 5) << "FLUSH" << dendl; + send_invalidate_cache(); + break; + + case STATE_INVALIDATE_CACHE: + ldout(cct, 5) << "INVALIDATE_CACHE" << dendl; + send_trim_image(); + break; + + case STATE_TRIM_IMAGE: + ldout(cct, 5) << "TRIM_IMAGE" << dendl; + send_update_header(); + break; + + case STATE_GROW_OBJECT_MAP: + ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl; + send_update_header(); + break; + + case STATE_UPDATE_HEADER: + ldout(cct, 5) << "UPDATE_HEADER" << dendl; + if (send_shrink_object_map()) { + update_size_and_overlap(); + return true; + } + break; + + case STATE_SHRINK_OBJECT_MAP: + ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl; + update_size_and_overlap(); + return true; + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; + } + return false; +} + +void ResizeRequest::send() { + assert(m_image_ctx.owner_lock.is_locked()); + + { + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + if (!m_xlist_item.is_on_list()) { + m_image_ctx.resize_reqs.push_back(&m_xlist_item); + if (m_image_ctx.resize_reqs.front() != this) { + return; + } + } + + assert(m_image_ctx.resize_reqs.front() == this); + m_original_size = m_image_ctx.size; + compute_parent_overlap(); + } + + CephContext *cct = m_image_ctx.cct; + if (is_canceled()) { + complete(-ERESTART); + } else if (m_original_size == m_new_size) { + ldout(cct, 2) << this << " no change in size (" << m_original_size + << " -> " << m_new_size << ")" << dendl; + m_state = STATE_FINISHED; + complete(0); + } else if (m_new_size > m_original_size) { + ldout(cct, 2) << this << " expanding image (" << m_original_size + << " -> " << m_new_size << ")" << dendl; + send_grow_object_map(); + } else { + ldout(cct, 2) << this << " shrinking image (" << m_original_size + << " -> " << m_new_size << ")" << dendl; + send_flush(); + } +} + +void ResizeRequest::send_flush() { + ldout(m_image_ctx.cct, 5) << this << " send_flush: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_FLUSH; + + // with clipping adjusted, ensure that write / copy-on-read operations won't + // (re-)create objects that we just removed. need async callback to ensure + // we don't have cache_lock already held + m_image_ctx.flush_async_operations(create_async_callback_context()); +} + +void ResizeRequest::send_invalidate_cache() { + assert(m_image_ctx.owner_lock.is_locked()); + ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_INVALIDATE_CACHE; + + // need to invalidate since we're deleting objects, and + // ObjectCacher doesn't track non-existent objects + m_image_ctx.invalidate_cache(create_callback_context()); +} + +void ResizeRequest::send_trim_image() { + assert(m_image_ctx.owner_lock.is_locked()); + ldout(m_image_ctx.cct, 5) << this << " send_trim_image: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_TRIM_IMAGE; + + TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(), + m_original_size, m_new_size, m_prog_ctx); + req->send(); +} + +void ResizeRequest::send_grow_object_map() { + assert(m_image_ctx.owner_lock.is_locked()); + if (!m_image_ctx.object_map.enabled()) { + send_update_header(); + return; + } + + ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_GROW_OBJECT_MAP; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT, + create_callback_context()); +} + +bool ResizeRequest::send_shrink_object_map() { + assert(m_image_ctx.owner_lock.is_locked()); + if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) { + return true; + } + + ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_SHRINK_OBJECT_MAP; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT, + create_callback_context()); + return false; +} + +void ResizeRequest::send_update_header() { + assert(m_image_ctx.owner_lock.is_locked()); + + ldout(m_image_ctx.cct, 5) << this << " send_update_header: " + << " original_size=" << m_original_size + << " new_size=" << m_new_size << dendl; + m_state = STATE_UPDATE_HEADER; + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + librados::ObjectWriteOperation op; + if (m_image_ctx.old_format) { + // rewrite only the size field of the header + // NOTE: format 1 image headers are not stored in fixed endian format + bufferlist bl; + bl.append(reinterpret_cast(&m_new_size), sizeof(m_new_size)); + op.write(offsetof(rbd_obj_header_ondisk, image_size), bl); + } else { + if (m_image_ctx.image_watcher->is_lock_supported()) { + m_image_ctx.image_watcher->assert_header_locked(&op); + } + cls_client::set_size(&op, m_new_size); + } + + librados::AioCompletion *rados_completion = create_callback_completion(); + int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, + rados_completion, &op); + assert(r == 0); + rados_completion->release(); +} + +void ResizeRequest::compute_parent_overlap() { + RWLock::RLocker l2(m_image_ctx.parent_lock); + if (m_image_ctx.parent == NULL) { + m_new_parent_overlap = 0; + } else { + m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap); + } +} + +void ResizeRequest::update_size_and_overlap() { + RWLock::WLocker snap_locker(m_image_ctx.snap_lock); + m_image_ctx.size = m_new_size; + + RWLock::WLocker parent_locker(m_image_ctx.parent_lock); + if (m_image_ctx.parent != NULL && m_new_size < m_original_size) { + m_image_ctx.parent_md.overlap = m_new_parent_overlap; + } +} + +} // namespace operation +} // namespace librbd diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h new file mode 100644 index 00000000000..f7b941005fb --- /dev/null +++ b/src/librbd/operation/ResizeRequest.h @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H +#define CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H + +#include "librbd/AsyncRequest.h" +#include "include/xlist.h" + +namespace librbd +{ + +class ImageCtx; +class ProgressContext; + +namespace operation { + +class ResizeRequest : public AsyncRequest<> +{ +public: + ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size, + ProgressContext &prog_ctx); + virtual ~ResizeRequest(); + + virtual void send(); + + inline bool shrinking() const { + return m_new_size < m_original_size; + } + + inline uint64_t get_image_size() const { + return m_new_size; + } + +private: + /** + * Resize goes through the following state machine to resize the image + * and update the object map: + * + * @verbatim + * + * -------------> STATE_FINISHED -----------------------------\ + * | . (no change) | + * | . | + * | . . . . . . . . . . . . . . . . . . . . . | + * | . | + * | v | + * |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------| + * | (grow) | + * | | + * | | + * \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE | + * (shrink) | | + * | | + * /----------------------/ | + * | | + * v | + * STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . | + * | . | + * | . | + * v v v + * STATE_SHRINK_OBJECT_MAP ---> + * + * @endverbatim + * + * The _OBJECT_MAP states are skipped if the object map isn't enabled. + * The state machine will immediately transition to _FINISHED if there + * are no objects to trim. + */ + enum State { + STATE_FLUSH, + STATE_INVALIDATE_CACHE, + STATE_TRIM_IMAGE, + STATE_GROW_OBJECT_MAP, + STATE_UPDATE_HEADER, + STATE_SHRINK_OBJECT_MAP, + STATE_FINISHED + }; + + State m_state; + uint64_t m_original_size; + uint64_t m_new_size; + ProgressContext &m_prog_ctx; + uint64_t m_new_parent_overlap; + + xlist::item m_xlist_item; + + virtual bool should_complete(int r); + + void send_flush(); + void send_invalidate_cache(); + void send_trim_image(); + void send_grow_object_map(); + bool send_shrink_object_map(); + void send_update_header(); + + void compute_parent_overlap(); + void update_size_and_overlap(); + +}; + +} // namespace operation +} // namespace librbd + +#endif // CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc new file mode 100644 index 00000000000..8d9546d15f7 --- /dev/null +++ b/src/librbd/operation/TrimRequest.cc @@ -0,0 +1,358 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/operation/TrimRequest.h" +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/AioObjectRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageWatcher.h" +#include "librbd/internal.h" +#include "librbd/ObjectMap.h" +#include "common/ContextCompletion.h" +#include "common/dout.h" +#include "common/errno.h" +#include "osdc/Striper.h" + +#include +#include +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::TrimRequest: " + +namespace librbd { +namespace operation { + +class C_CopyupObject : public C_AsyncObjectThrottle<> { +public: + C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, + ::SnapContext snapc, uint64_t object_no) + : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc), + m_object_no(object_no) + { + } + + virtual int send() { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + string oid = m_image_ctx.get_object_name(m_object_no); + ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl; + + AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no, + m_snapc, this); + req->send(); + return 0; + } +private: + ::SnapContext m_snapc; + uint64_t m_object_no; +}; + +class C_RemoveObject : public C_AsyncObjectThrottle<> { +public: + C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, + uint64_t object_no) + : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no) + { + } + + virtual int send() { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + if (!m_image_ctx.object_map.object_may_exist(m_object_no)) { + return 1; + } + + string oid = m_image_ctx.get_object_name(m_object_no); + ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl; + + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb); + int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion); + assert(r == 0); + rados_completion->release(); + return 0; + } + +private: + uint64_t m_object_no; +}; + +TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx) + : AsyncRequest(image_ctx, on_finish), m_new_size(new_size), + m_prog_ctx(prog_ctx) +{ + uint64_t period = m_image_ctx.get_stripe_period(); + uint64_t new_num_periods = ((m_new_size + period - 1) / period); + m_delete_off = MIN(new_num_periods * period, original_size); + // first object we can delete free and clear + m_delete_start = new_num_periods * m_image_ctx.get_stripe_count(); + m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size); + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " trim image " << original_size << " -> " + << m_new_size << " periods " << new_num_periods + << " discard to offset " << m_delete_off + << " delete objects " << m_delete_start + << " to " << m_num_objects << dendl; +} + + +bool TrimRequest::should_complete(int r) +{ + CephContext *cct = m_image_ctx.cct; + ldout(cct, 5) << this << " should_complete: r=" << r << dendl; + if (r < 0) { + lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl; + return true; + } + + RWLock::RLocker owner_lock(m_image_ctx.owner_lock); + switch (m_state) { + case STATE_COPYUP_OBJECTS: + ldout(cct, 5) << " COPYUP_OBJECTS" << dendl; + send_pre_remove(); + break; + + case STATE_PRE_REMOVE: + ldout(cct, 5) << " PRE_REMOVE" << dendl; + send_remove_objects(); + break; + + case STATE_REMOVE_OBJECTS: + ldout(cct, 5) << " REMOVE_OBJECTS" << dendl; + send_post_remove(); + break; + + case STATE_POST_REMOVE: + ldout(cct, 5) << " POST_OBJECTS" << dendl; + send_clean_boundary(); + break; + + case STATE_CLEAN_BOUNDARY: + ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl; + finish(0); + break; + + case STATE_FINISHED: + ldout(cct, 5) << "FINISHED" << dendl; + return true; + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; + } + return false; +} + +void TrimRequest::send() { + send_copyup_objects(); +} + +void TrimRequest::send_copyup_objects() { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + + if (m_delete_start >= m_num_objects) { + send_clean_boundary(); + return; + } + + ::SnapContext snapc; + bool has_snapshots; + uint64_t parent_overlap; + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + RWLock::RLocker parent_locker(m_image_ctx.parent_lock); + + snapc = m_image_ctx.snapc; + has_snapshots = !m_image_ctx.snaps.empty(); + int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(), + &parent_overlap); + assert(r == 0); + } + + // copyup is only required for portion of image that overlaps parent + uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout, + parent_overlap); + // TODO: protect against concurrent shrink and snap create? + if (copyup_end <= m_delete_start || !has_snapshots) { + send_pre_remove(); + return; + } + + uint64_t copyup_start = m_delete_start; + m_delete_start = copyup_end; + + ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: " + << " start object=" << copyup_start << ", " + << " end object=" << copyup_end << dendl; + m_state = STATE_COPYUP_OBJECTS; + + Context *ctx = create_callback_context(); + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start, + copyup_end); + throttle->start_ops(m_image_ctx.concurrent_management_ops); +} + +void TrimRequest::send_remove_objects() { + assert(m_image_ctx.owner_lock.is_locked()); + + ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_REMOVE_OBJECTS; + + Context *ctx = create_callback_context(); + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, &m_image_ctx, boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start, + m_num_objects); + throttle->start_ops(m_image_ctx.concurrent_management_ops); +} + +void TrimRequest::send_pre_remove() { + assert(m_image_ctx.owner_lock.is_locked()); + if (m_delete_start >= m_num_objects) { + send_clean_boundary(); + return; + } + + bool remove_objects = false; + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + if (!m_image_ctx.object_map.enabled()) { + remove_objects = true; + } else { + ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_PRE_REMOVE; + + assert(m_image_ctx.image_watcher->is_lock_owner()); + + // flag the objects as pending deletion + Context *ctx = create_callback_context(); + RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); + if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects, + OBJECT_PENDING, OBJECT_EXISTS, + ctx)) { + delete ctx; + remove_objects = true; + } + } + } + + // avoid possible recursive lock attempts + if (remove_objects) { + // no object map update required + send_remove_objects(); + } +} + +void TrimRequest::send_post_remove() { + assert(m_image_ctx.owner_lock.is_locked()); + + bool clean_boundary = false; + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + if (!m_image_ctx.object_map.enabled()) { + clean_boundary = true; + } else { + ldout(m_image_ctx.cct, 5) << this << " send_post_remove: " + << " delete_start=" << m_delete_start + << " num_objects=" << m_num_objects << dendl; + m_state = STATE_POST_REMOVE; + + assert(m_image_ctx.image_watcher->is_lock_owner()); + + // flag the pending objects as removed + Context *ctx = create_callback_context(); + RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); + if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects, + OBJECT_NONEXISTENT, + OBJECT_PENDING, ctx)) { + delete ctx; + clean_boundary = true; + } + } + } + + // avoid possible recursive lock attempts + if (clean_boundary) { + // no object map update required + send_clean_boundary(); + } +} + +void TrimRequest::send_clean_boundary() { + assert(m_image_ctx.owner_lock.is_locked()); + CephContext *cct = m_image_ctx.cct; + if (m_delete_off <= m_new_size) { + finish(0); + return; + } + + // should have been canceled prior to releasing lock + assert(!m_image_ctx.image_watcher->is_lock_supported() || + m_image_ctx.image_watcher->is_lock_owner()); + uint64_t delete_len = m_delete_off - m_new_size; + ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: " + << " delete_off=" << m_delete_off + << " length=" << delete_len << dendl; + m_state = STATE_CLEAN_BOUNDARY; + + ::SnapContext snapc; + { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + snapc = m_image_ctx.snapc; + } + + // discard the weird boundary + std::vector extents; + Striper::file_to_extents(cct, m_image_ctx.format_string, + &m_image_ctx.layout, m_new_size, delete_len, 0, + extents); + + ContextCompletion *completion = + new ContextCompletion(create_callback_context(), true); + for (vector::iterator p = extents.begin(); + p != extents.end(); ++p) { + ldout(cct, 20) << " ex " << *p << dendl; + Context *req_comp = new C_ContextCompletion(*completion); + + AioObjectRequest *req; + if (p->offset == 0) { + req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc, + req_comp); + } else { + req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno, + p->offset, snapc, req_comp); + } + req->send(); + } + completion->finish_adding_requests(); +} + +void TrimRequest::finish(int r) { + m_state = STATE_FINISHED; + async_complete(r); +} + +} // namespace operation +} // namespace librbd diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h new file mode 100644 index 00000000000..d781c0c97b1 --- /dev/null +++ b/src/librbd/operation/TrimRequest.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H +#define CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H + +#include "librbd/AsyncRequest.h" + +namespace librbd +{ + +class ImageCtx; +class ProgressContext; + +namespace operation { + +class TrimRequest : public AsyncRequest<> +{ +public: + TrimRequest(ImageCtx &image_ctx, Context *on_finish, + uint64_t original_size, uint64_t new_size, + ProgressContext &prog_ctx); + + virtual void send(); + +protected: + /** + * Trim goes through the following state machine to remove whole objects, + * clean partially trimmed objects, and update the object map: + * + * @verbatim + * + * . . . . > STATE_FINISHED . . . . . . . . . + * | . . + * | . . . . . . . . . . . . . + * | . . + * v . . + * STATE_COPYUP_OBJECTS . . . . . + * | . . . + * | . . . + * v v v . + * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS . + * | . . . + * /-----------------------/ . . . . . . . . + * | . . . + * v v v v + * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> + * . ^ + * . . + * . . . . . . . . . . . . . . . . . . . . . . . + * + * @endverbatim + * + * The _COPYUP_OBJECTS state is skipped if there is no parent overlap + * within the new image size and the image does not have any snapshots. + * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map + * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects + * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary + * objects are cleaned. The state machine will immediately transition + * to _FINISHED state if there are no bytes to trim. + */ + + enum State { + STATE_COPYUP_OBJECTS, + STATE_PRE_REMOVE, + STATE_REMOVE_OBJECTS, + STATE_POST_REMOVE, + STATE_CLEAN_BOUNDARY, + STATE_FINISHED + }; + + virtual bool should_complete(int r); + + State m_state; + +private: + uint64_t m_delete_start; + uint64_t m_num_objects; + uint64_t m_delete_off; + uint64_t m_new_size; + ProgressContext &m_prog_ctx; + + void send_copyup_objects(); + void send_remove_objects(); + void send_pre_remove(); + void send_post_remove(); + void send_clean_boundary(); + void finish(int r); +}; + +} // namespace operation +} // namespace librbd + +#endif // CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H