+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "librbd/FlattenRequest.h"
-#include "librbd/AioObjectRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/ObjectMap.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::FlattenRequest: "
-
-namespace librbd {
-
-class C_FlattenObject : public C_AsyncObjectThrottle<> {
-public:
- C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
- uint64_t object_size, ::SnapContext snapc, uint64_t object_no)
- : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size),
- m_snapc(snapc), m_object_no(object_no)
- {
- }
-
- virtual int send() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- if (m_image_ctx.image_watcher->is_lock_supported() &&
- !m_image_ctx.image_watcher->is_lock_owner()) {
- ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
- return -ERESTART;
- }
-
- bufferlist bl;
- string oid = m_image_ctx.get_object_name(m_object_no);
- AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0,
- bl, m_snapc, this);
- if (!req->has_parent()) {
- // stop early if the parent went away - it just means
- // another flatten finished first or the image was resized
- delete req;
- return 1;
- }
-
- req->send();
- return 0;
- }
-
-private:
- uint64_t m_object_size;
- ::SnapContext m_snapc;
- uint64_t m_object_no;
-};
-
-bool FlattenRequest::should_complete(int r) {
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
- if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
- lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
- return true;
- }
-
- RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
- switch (m_state) {
- case STATE_FLATTEN_OBJECTS:
- ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
- return send_update_header();
-
- case STATE_UPDATE_HEADER:
- ldout(cct, 5) << "UPDATE_HEADER" << dendl;
- return send_update_children();
-
- case STATE_UPDATE_CHILDREN:
- ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
- return true;
-
- default:
- lderr(cct) << "invalid state: " << m_state << dendl;
- assert(false);
- break;
- }
- return false;
-}
-
-void FlattenRequest::send() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 5) << this << " send" << dendl;
-
- m_state = STATE_FLATTEN_OBJECTS;
- AsyncObjectThrottle<>::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject>(),
- boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
- boost::lambda::_2));
- AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
- this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
- 0, m_overlap_objects);
- throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-bool FlattenRequest::send_update_header() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- ldout(cct, 5) << this << " send_update_header" << dendl;
- m_state = STATE_UPDATE_HEADER;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- {
- RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
- // stop early if the parent went away - it just means
- // another flatten finished first, so this one is useless.
- if (!m_image_ctx.parent) {
- ldout(cct, 5) << "image already flattened" << dendl;
- return true;
- }
- m_parent_spec = m_image_ctx.parent_md.spec;
- }
- m_ignore_enoent = true;
-
- // remove parent from this (base) image
- librados::ObjectWriteOperation op;
- if (m_image_ctx.image_watcher->is_lock_supported()) {
- m_image_ctx.image_watcher->assert_header_locked(&op);
- }
- cls_client::remove_parent(&op);
-
- librados::AioCompletion *rados_completion = create_callback_completion();
- int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
- rados_completion, &op);
- assert(r == 0);
- rados_completion->release();
- return false;
-}
-
-bool FlattenRequest::send_update_children() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- // if there are no snaps, remove from the children object as well
- // (if snapshots remain, they have their own parent info, and the child
- // will be removed when the last snap goes away)
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
- !m_image_ctx.snaps.empty()) {
- return true;
- }
-
- ldout(cct, 2) << "removing child from children list..." << dendl;
- m_state = STATE_UPDATE_CHILDREN;
-
- librados::ObjectWriteOperation op;
- cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
-
- librados::AioCompletion *rados_completion = create_callback_completion();
- int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
- &op);
- assert(r == 0);
- rados_completion->release();
- return false;
-}
-
-} // namespace librbd
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_FLATTEN_REQUEST_H
-#define CEPH_LIBRBD_FLATTEN_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-#include "librbd/parent_types.h"
-#include "common/snap_types.h"
-
-namespace librbd {
-
-class ImageCtx;
-class ProgressContext;
-
-class FlattenRequest : public AsyncRequest<>
-{
-public:
- FlattenRequest(ImageCtx &image_ctx, Context *on_finish,
- uint64_t object_size, uint64_t overlap_objects,
- const ::SnapContext &snapc, ProgressContext &prog_ctx)
- : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
- m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
- m_ignore_enoent(false)
- {
- }
-
- virtual void send();
-
-protected:
- virtual bool should_complete(int r);
-
-private:
- /**
- * Flatten goes through the following state machine to copyup objects
- * from the parent image:
- *
- * @verbatim
- *
- * <start>
- * |
- * v
- * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
- * . | .
- * . | .
- * . v .
- * . STATE_UPDATE_CHILDREN .
- * . | .
- * . | .
- * . \---> <finish> < . .
- * . ^
- * . .
- * . . . . . . . . . . . . . . . . . . .
- *
- * @endverbatim
- *
- * The _UPDATE_CHILDREN state will be skipped if the image has one or
- * more snapshots. The _UPDATE_HEADER state will be skipped if the
- * image was concurrently flattened by another client.
- */
- enum State {
- STATE_FLATTEN_OBJECTS,
- STATE_UPDATE_HEADER,
- STATE_UPDATE_CHILDREN
- };
-
- uint64_t m_object_size;
- uint64_t m_overlap_objects;
- ::SnapContext m_snapc;
- ProgressContext &m_prog_ctx;
- State m_state;
-
- parent_spec m_parent_spec;
- bool m_ignore_enoent;
-
- bool send_update_header();
- bool send_update_children();
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_FLATTEN_REQUEST_H
#include "librbd/Journal.h"
#include "librbd/LibrbdAdminSocketHook.h"
#include "librbd/ObjectMap.h"
-#include "librbd/ResizeRequest.h"
+#include "librbd/operation/ResizeRequest.h"
#include <boost/bind.hpp>
class LibrbdAdminSocketHook;
class ImageWatcher;
class Journal;
+
+ namespace operation {
class ResizeRequest;
+ }
struct ImageCtx {
CephContext *cct;
atomic_t async_request_seq;
- xlist<ResizeRequest*> resize_reqs;
+ xlist<operation::ResizeRequest*> resize_reqs;
AioImageRequestWQ *aio_work_queue;
ContextWQ *op_work_queue;
librbd/ImageCtx.cc \
librbd/ImageWatcher.cc \
librbd/internal.cc \
- librbd/FlattenRequest.cc \
librbd/Journal.cc \
librbd/JournalReplay.cc \
librbd/LibrbdAdminSocketHook.cc \
librbd/LibrbdWriteback.cc \
librbd/ObjectMap.cc \
- librbd/RebuildObjectMapRequest.cc \
- librbd/ResizeRequest.cc \
- librbd/TrimRequest.cc
+ librbd/operation/FlattenRequest.cc \
+ librbd/operation/RebuildObjectMapRequest.cc \
+ librbd/operation/ResizeRequest.cc \
+ librbd/operation/TrimRequest.cc
noinst_LTLIBRARIES += librbd_internal.la
librbd_api_la_SOURCES = \
librbd/ImageCtx.h \
librbd/ImageWatcher.h \
librbd/internal.h \
- librbd/FlattenRequest.h \
librbd/Journal.h \
librbd/JournalReplay.h \
librbd/JournalTypes.h \
librbd/LibrbdWriteback.h \
librbd/ObjectMap.h \
librbd/parent_types.h \
- librbd/RebuildObjectMapRequest.h \
- librbd/ResizeRequest.h \
librbd/SnapInfo.h \
librbd/TaskFinisher.h \
- librbd/TrimRequest.h \
- librbd/WatchNotifyTypes.h
+ librbd/WatchNotifyTypes.h \
+ librbd/operation/FlattenRequest.h \
+ librbd/operation/RebuildObjectMapRequest.h \
+ librbd/operation/ResizeRequest.h \
+ librbd/operation/TrimRequest.h
endif # WITH_RBD
endif # WITH_RADOS
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "librbd/RebuildObjectMapRequest.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "librbd/ResizeRequest.h"
-#include "librbd/TrimRequest.h"
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: "
-
-namespace librbd {
-
-namespace {
-
-class C_VerifyObject : public C_AsyncObjectThrottle<> {
-public:
- C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
- uint64_t snap_id, uint64_t object_no)
- : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id),
- m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no))
- {
- m_io_ctx.dup(m_image_ctx.md_ctx);
- m_io_ctx.snap_set_read(CEPH_SNAPDIR);
- }
-
- virtual void complete(int r) {
- if (should_complete(r)) {
- ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed "
- << dendl;
- finish(r);
- delete this;
- }
- }
-
- virtual int send() {
- send_list_snaps();
- return 0;
- }
-
-private:
- librados::IoCtx m_io_ctx;
- uint64_t m_snap_id;
- uint64_t m_object_no;
- std::string m_oid;
-
- librados::snap_set_t m_snap_set;
- int m_snap_list_ret;
-
- bool should_complete(int r) {
- CephContext *cct = m_image_ctx.cct;
- if (r == 0) {
- r = m_snap_list_ret;
- }
- if (r < 0 && r != -ENOENT) {
- lderr(cct) << m_oid << " C_VerifyObject::should_complete: "
- << "encountered an error: " << cpp_strerror(r) << dendl;
- return true;
- }
-
- ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r="
- << r << dendl;
- return update_object_map(get_object_state());
- }
-
- void send_list_snaps() {
- assert(m_image_ctx.owner_lock.is_locked());
- ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps"
- << dendl;
-
- librados::AioCompletion *comp = librados::Rados::aio_create_completion(
- this, NULL, rados_ctx_cb);
-
- librados::ObjectReadOperation op;
- op.list_snaps(&m_snap_set, &m_snap_list_ret);
-
- int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL);
- assert(r == 0);
- comp->release();
- }
-
- uint8_t get_object_state() {
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- for (std::vector<librados::clone_info_t>::const_iterator r =
- m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) {
- librados::snap_t from_snap_id;
- librados::snap_t to_snap_id;
- if (r->cloneid == librados::SNAP_HEAD) {
- from_snap_id = next_valid_snap_id(m_snap_set.seq + 1);
- to_snap_id = librados::SNAP_HEAD;
- } else {
- from_snap_id = next_valid_snap_id(r->snaps[0]);
- to_snap_id = r->snaps[r->snaps.size()-1];
- }
-
- if (to_snap_id < m_snap_id) {
- continue;
- } else if (m_snap_id < from_snap_id) {
- break;
- }
-
- if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 &&
- from_snap_id != m_snap_id) {
- return OBJECT_EXISTS_CLEAN;
- }
- return OBJECT_EXISTS;
- }
- return OBJECT_NONEXISTENT;
- }
-
- uint64_t next_valid_snap_id(uint64_t snap_id) {
- assert(m_image_ctx.snap_lock.is_locked());
-
- std::map<librados::snap_t, SnapInfo>::iterator it =
- m_image_ctx.snap_info.lower_bound(snap_id);
- if (it == m_image_ctx.snap_info.end()) {
- return CEPH_NOSNAP;
- }
- return it->first;
- }
-
- bool update_object_map(uint8_t new_state) {
- RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
- CephContext *cct = m_image_ctx.cct;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- RWLock::WLocker l(m_image_ctx.object_map_lock);
- uint8_t state = m_image_ctx.object_map[m_object_no];
- if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT &&
- m_snap_id == CEPH_NOSNAP) {
- // might be writing object to OSD concurrently
- new_state = state;
- }
-
- if (new_state != state) {
- ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map "
- << static_cast<uint32_t>(state) << "->"
- << static_cast<uint32_t>(new_state) << dendl;
- m_image_ctx.object_map[m_object_no] = new_state;
- }
- return true;
- }
-};
-
-} // anonymous namespace
-
-
-void RebuildObjectMapRequest::send() {
- send_resize_object_map();
-}
-
-bool RebuildObjectMapRequest::should_complete(int r) {
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-
- RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
- switch (m_state) {
- case STATE_RESIZE_OBJECT_MAP:
- ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl;
- if (r == -ESTALE && !m_attempted_trim) {
- // objects are still flagged as in-use -- delete them
- m_attempted_trim = true;
- send_trim_image();
- return false;
- } else if (r == 0) {
- send_verify_objects();
- }
- break;
-
- case STATE_TRIM_IMAGE:
- ldout(cct, 5) << "TRIM_IMAGE" << dendl;
- if (r == 0) {
- send_resize_object_map();
- }
- break;
-
- case STATE_VERIFY_OBJECTS:
- ldout(cct, 5) << "VERIFY_OBJECTS" << dendl;
- if (r == 0) {
- send_save_object_map();
- }
- break;
-
- case STATE_SAVE_OBJECT_MAP:
- ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl;
- if (r == 0) {
- send_update_header();
- }
- break;
- case STATE_UPDATE_HEADER:
- ldout(cct, 5) << "UPDATE_HEADER" << dendl;
- if (r == 0) {
- return true;
- }
- break;
-
- default:
- assert(false);
- break;
- }
-
- if (r < 0) {
- lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r)
- << dendl;
- return true;
- }
- return false;
-}
-
-void RebuildObjectMapRequest::send_resize_object_map() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- uint64_t num_objects;
- uint64_t size;
- {
- RWLock::RLocker l(m_image_ctx.snap_lock);
- size = get_image_size();
- num_objects = Striper::get_num_objects(m_image_ctx.layout, size);
- }
-
- if (m_image_ctx.object_map.size() == num_objects) {
- send_verify_objects();
- return;
- }
-
- ldout(cct, 5) << this << " send_resize_object_map" << dendl;
- m_state = STATE_RESIZE_OBJECT_MAP;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
- m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT,
- create_callback_context());
-}
-
-void RebuildObjectMapRequest::send_trim_image() {
- CephContext *cct = m_image_ctx.cct;
-
- RWLock::RLocker l(m_image_ctx.owner_lock);
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
- ldout(cct, 5) << this << " send_trim_image" << dendl;
- m_state = STATE_TRIM_IMAGE;
-
- uint64_t new_size;
- uint64_t orig_size;
- {
- RWLock::RLocker l(m_image_ctx.snap_lock);
- new_size = get_image_size();
- orig_size = m_image_ctx.get_object_size() *
- m_image_ctx.object_map.size();
- }
- TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
- orig_size, new_size, m_prog_ctx);
- req->send();
-}
-
-void RebuildObjectMapRequest::send_verify_objects() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- uint64_t snap_id;
- uint64_t num_objects;
- {
- RWLock::RLocker l(m_image_ctx.snap_lock);
- snap_id = m_image_ctx.snap_id;
- num_objects = Striper::get_num_objects(m_image_ctx.layout,
- m_image_ctx.get_image_size(snap_id));
- }
-
- if (num_objects == 0) {
- send_save_object_map();
- return;
- }
-
- m_state = STATE_VERIFY_OBJECTS;
- ldout(cct, 5) << this << " send_verify_objects" << dendl;
-
- AsyncObjectThrottle<>::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObject>(),
- boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2));
- AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
- this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
- 0, num_objects);
- throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
-}
-
-void RebuildObjectMapRequest::send_save_object_map() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
-
- ldout(cct, 5) << this << " send_save_object_map" << dendl;
- m_state = STATE_SAVE_OBJECT_MAP;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
- m_image_ctx.object_map.aio_save(create_callback_context());
-}
-
-void RebuildObjectMapRequest::send_update_header() {
- assert(m_image_ctx.owner_lock.is_locked());
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl;
- m_state = STATE_UPDATE_HEADER;
-
- librados::ObjectWriteOperation op;
- if (m_image_ctx.image_watcher->is_lock_supported()) {
- m_image_ctx.image_watcher->assert_header_locked(&op);
- }
-
- uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID;
- cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags);
-
- librados::AioCompletion *comp = create_callback_completion();
- int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
- assert(r == 0);
- comp->release();
-
- RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
- m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false);
-}
-
-uint64_t RebuildObjectMapRequest::get_image_size() const {
- assert(m_image_ctx.snap_lock.is_locked());
- if (m_image_ctx.snap_id == CEPH_NOSNAP) {
- if (!m_image_ctx.resize_reqs.empty()) {
- return m_image_ctx.resize_reqs.front()->get_image_size();
- } else {
- return m_image_ctx.size;
- }
- }
- return m_image_ctx.get_image_size(m_image_ctx.snap_id);
-}
-
-} // namespace librbd
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
-#define CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
-
-#include "include/int_types.h"
-#include "librbd/AsyncRequest.h"
-
-namespace librbd {
-
-class ImageCtx;
-class ProgressContext;
-
-class RebuildObjectMapRequest : public AsyncRequest<> {
-public:
-
- RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish,
- ProgressContext &prog_ctx)
- : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx),
- m_prog_ctx(prog_ctx), m_attempted_trim(false)
- {
- }
-
- virtual void send();
-
-protected:
- virtual bool should_complete(int r);
-
-private:
- /**
- * Rebuild object map goes through the following state machine to
- * verify per-object state:
- *
- * <start>
- * . | . . . . . . . . . .
- * . | . .
- * . v v .
- * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE
- * . |
- * . v
- * . . . > STATE_VERIFY_OBJECTS
- * |
- * v
- * STATE_SAVE_OBJECT_MAP
- * |
- * v
- * STATE_UPDATE_HEADER
- *
- * The _RESIZE_OBJECT_MAP state will be skipped if the object map
- * is appropriately sized for the image. The _TRIM_IMAGE state will
- * only be hit if the resize failed due to an in-use object.
- */
- enum State {
- STATE_RESIZE_OBJECT_MAP,
- STATE_TRIM_IMAGE,
- STATE_VERIFY_OBJECTS,
- STATE_SAVE_OBJECT_MAP,
- STATE_UPDATE_HEADER
- };
-
- ImageCtx &m_image_ctx;
- ProgressContext &m_prog_ctx;
- State m_state;
- bool m_attempted_trim;
-
- void send_resize_object_map();
- void send_trim_image();
- void send_verify_objects();
- void send_save_object_map();
- void send_update_header();
-
- uint64_t get_image_size() const;
-
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include "librbd/ResizeRequest.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "librbd/TrimRequest.h"
-#include "common/dout.h"
-#include "common/errno.h"
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::ResizeRequest: "
-
-namespace librbd
-{
-
-ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish,
- uint64_t new_size,
- ProgressContext &prog_ctx)
- : AsyncRequest(image_ctx, on_finish),
- m_original_size(0), m_new_size(new_size),
- m_prog_ctx(prog_ctx), m_new_parent_overlap(0),
- m_xlist_item(this)
-{
-}
-
-ResizeRequest::~ResizeRequest() {
- ResizeRequest *next_req = NULL;
- {
- RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
- assert(m_xlist_item.remove_myself());
- if (!m_image_ctx.resize_reqs.empty()) {
- next_req = m_image_ctx.resize_reqs.front();
- }
- }
-
- if (next_req != NULL) {
- RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
- next_req->send();
- }
-}
-
-bool ResizeRequest::should_complete(int r) {
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-
- if (r < 0) {
- lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
- return true;
- }
- if (m_state == STATE_FINISHED) {
- ldout(cct, 5) << "FINISHED" << dendl;
- return true;
- }
-
- RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
- switch (m_state) {
- case STATE_FLUSH:
- ldout(cct, 5) << "FLUSH" << dendl;
- send_invalidate_cache();
- break;
-
- case STATE_INVALIDATE_CACHE:
- ldout(cct, 5) << "INVALIDATE_CACHE" << dendl;
- send_trim_image();
- break;
-
- case STATE_TRIM_IMAGE:
- ldout(cct, 5) << "TRIM_IMAGE" << dendl;
- send_update_header();
- break;
-
- case STATE_GROW_OBJECT_MAP:
- ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
- send_update_header();
- break;
-
- case STATE_UPDATE_HEADER:
- ldout(cct, 5) << "UPDATE_HEADER" << dendl;
- if (send_shrink_object_map()) {
- update_size_and_overlap();
- return true;
- }
- break;
-
- case STATE_SHRINK_OBJECT_MAP:
- ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
- update_size_and_overlap();
- return true;
-
- default:
- lderr(cct) << "invalid state: " << m_state << dendl;
- assert(false);
- break;
- }
- return false;
-}
-
-void ResizeRequest::send() {
- assert(m_image_ctx.owner_lock.is_locked());
-
- {
- RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
- if (!m_xlist_item.is_on_list()) {
- m_image_ctx.resize_reqs.push_back(&m_xlist_item);
- if (m_image_ctx.resize_reqs.front() != this) {
- return;
- }
- }
-
- assert(m_image_ctx.resize_reqs.front() == this);
- m_original_size = m_image_ctx.size;
- compute_parent_overlap();
- }
-
- CephContext *cct = m_image_ctx.cct;
- if (is_canceled()) {
- complete(-ERESTART);
- } else if (m_original_size == m_new_size) {
- ldout(cct, 2) << this << " no change in size (" << m_original_size
- << " -> " << m_new_size << ")" << dendl;
- m_state = STATE_FINISHED;
- complete(0);
- } else if (m_new_size > m_original_size) {
- ldout(cct, 2) << this << " expanding image (" << m_original_size
- << " -> " << m_new_size << ")" << dendl;
- send_grow_object_map();
- } else {
- ldout(cct, 2) << this << " shrinking image (" << m_original_size
- << " -> " << m_new_size << ")" << dendl;
- send_flush();
- }
-}
-
-void ResizeRequest::send_flush() {
- ldout(m_image_ctx.cct, 5) << this << " send_flush: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_FLUSH;
-
- // with clipping adjusted, ensure that write / copy-on-read operations won't
- // (re-)create objects that we just removed. need async callback to ensure
- // we don't have cache_lock already held
- m_image_ctx.flush_async_operations(create_async_callback_context());
-}
-
-void ResizeRequest::send_invalidate_cache() {
- assert(m_image_ctx.owner_lock.is_locked());
- ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_INVALIDATE_CACHE;
-
- // need to invalidate since we're deleting objects, and
- // ObjectCacher doesn't track non-existent objects
- m_image_ctx.invalidate_cache(create_callback_context());
-}
-
-void ResizeRequest::send_trim_image() {
- assert(m_image_ctx.owner_lock.is_locked());
- ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_TRIM_IMAGE;
-
- TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
- m_original_size, m_new_size, m_prog_ctx);
- req->send();
-}
-
-void ResizeRequest::send_grow_object_map() {
- assert(m_image_ctx.owner_lock.is_locked());
- if (!m_image_ctx.object_map.enabled()) {
- send_update_header();
- return;
- }
-
- ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_GROW_OBJECT_MAP;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
- create_callback_context());
-}
-
-bool ResizeRequest::send_shrink_object_map() {
- assert(m_image_ctx.owner_lock.is_locked());
- if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) {
- return true;
- }
-
- ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_SHRINK_OBJECT_MAP;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
- create_callback_context());
- return false;
-}
-
-void ResizeRequest::send_update_header() {
- assert(m_image_ctx.owner_lock.is_locked());
-
- ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
- << " original_size=" << m_original_size
- << " new_size=" << m_new_size << dendl;
- m_state = STATE_UPDATE_HEADER;
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- librados::ObjectWriteOperation op;
- if (m_image_ctx.old_format) {
- // rewrite only the size field of the header
- // NOTE: format 1 image headers are not stored in fixed endian format
- bufferlist bl;
- bl.append(reinterpret_cast<const char*>(&m_new_size), sizeof(m_new_size));
- op.write(offsetof(rbd_obj_header_ondisk, image_size), bl);
- } else {
- if (m_image_ctx.image_watcher->is_lock_supported()) {
- m_image_ctx.image_watcher->assert_header_locked(&op);
- }
- cls_client::set_size(&op, m_new_size);
- }
-
- librados::AioCompletion *rados_completion = create_callback_completion();
- int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
- rados_completion, &op);
- assert(r == 0);
- rados_completion->release();
-}
-
-void ResizeRequest::compute_parent_overlap() {
- RWLock::RLocker l2(m_image_ctx.parent_lock);
- if (m_image_ctx.parent == NULL) {
- m_new_parent_overlap = 0;
- } else {
- m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap);
- }
-}
-
-void ResizeRequest::update_size_and_overlap() {
- RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
- m_image_ctx.size = m_new_size;
-
- RWLock::WLocker parent_locker(m_image_ctx.parent_lock);
- if (m_image_ctx.parent != NULL && m_new_size < m_original_size) {
- m_image_ctx.parent_md.overlap = m_new_parent_overlap;
- }
-}
-
-} // namespace librbd
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_RESIZE_REQUEST_H
-#define CEPH_LIBRBD_RESIZE_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-#include "include/xlist.h"
-
-namespace librbd
-{
-
-class ImageCtx;
-class ProgressContext;
-
-class ResizeRequest : public AsyncRequest<>
-{
-public:
- ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size,
- ProgressContext &prog_ctx);
- virtual ~ResizeRequest();
-
- virtual void send();
-
- inline bool shrinking() const {
- return m_new_size < m_original_size;
- }
-
- inline uint64_t get_image_size() const {
- return m_new_size;
- }
-
-private:
- /**
- * Resize goes through the following state machine to resize the image
- * and update the object map:
- *
- * @verbatim
- *
- * <start> -------------> STATE_FINISHED -----------------------------\
- * | . (no change) |
- * | . |
- * | . . . . . . . . . . . . . . . . . . . . . |
- * | . |
- * | v |
- * |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------|
- * | (grow) |
- * | |
- * | |
- * \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE |
- * (shrink) | |
- * | |
- * /----------------------/ |
- * | |
- * v |
- * STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . |
- * | . |
- * | . |
- * v v v
- * STATE_SHRINK_OBJECT_MAP ---> <finish>
- *
- * @endverbatim
- *
- * The _OBJECT_MAP states are skipped if the object map isn't enabled.
- * The state machine will immediately transition to _FINISHED if there
- * are no objects to trim.
- */
- enum State {
- STATE_FLUSH,
- STATE_INVALIDATE_CACHE,
- STATE_TRIM_IMAGE,
- STATE_GROW_OBJECT_MAP,
- STATE_UPDATE_HEADER,
- STATE_SHRINK_OBJECT_MAP,
- STATE_FINISHED
- };
-
- State m_state;
- uint64_t m_original_size;
- uint64_t m_new_size;
- ProgressContext &m_prog_ctx;
- uint64_t m_new_parent_overlap;
-
- xlist<ResizeRequest *>::item m_xlist_item;
-
- virtual bool should_complete(int r);
-
- void send_flush();
- void send_invalidate_cache();
- void send_trim_image();
- void send_grow_object_map();
- bool send_shrink_object_map();
- void send_update_header();
-
- void compute_parent_overlap();
- void update_size_and_overlap();
-
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_RESIZE_REQUEST_H
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include "librbd/TrimRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/AioObjectRequest.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "common/ContextCompletion.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include "osdc/Striper.h"
-
-#include <boost/bind.hpp>
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-#include <boost/scope_exit.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::TrimRequest: "
-
-namespace librbd
-{
-
-class C_CopyupObject : public C_AsyncObjectThrottle<> {
-public:
- C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
- ::SnapContext snapc, uint64_t object_no)
- : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc),
- m_object_no(object_no)
- {
- }
-
- virtual int send() {
- assert(m_image_ctx.owner_lock.is_locked());
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- string oid = m_image_ctx.get_object_name(m_object_no);
- ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
-
- AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no,
- m_snapc, this);
- req->send();
- return 0;
- }
-private:
- ::SnapContext m_snapc;
- uint64_t m_object_no;
-};
-
-class C_RemoveObject : public C_AsyncObjectThrottle<> {
-public:
- C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
- uint64_t object_no)
- : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no)
- {
- }
-
- virtual int send() {
- assert(m_image_ctx.owner_lock.is_locked());
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
- if (!m_image_ctx.object_map.object_may_exist(m_object_no)) {
- return 1;
- }
-
- string oid = m_image_ctx.get_object_name(m_object_no);
- ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
-
- librados::AioCompletion *rados_completion =
- librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
- int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
- assert(r == 0);
- rados_completion->release();
- return 0;
- }
-
-private:
- uint64_t m_object_no;
-};
-
-TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish,
- uint64_t original_size, uint64_t new_size,
- ProgressContext &prog_ctx)
- : AsyncRequest(image_ctx, on_finish), m_new_size(new_size),
- m_prog_ctx(prog_ctx)
-{
- uint64_t period = m_image_ctx.get_stripe_period();
- uint64_t new_num_periods = ((m_new_size + period - 1) / period);
- m_delete_off = MIN(new_num_periods * period, original_size);
- // first object we can delete free and clear
- m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
- m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
-
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 10) << this << " trim image " << original_size << " -> "
- << m_new_size << " periods " << new_num_periods
- << " discard to offset " << m_delete_off
- << " delete objects " << m_delete_start
- << " to " << m_num_objects << dendl;
-}
-
-
-bool TrimRequest::should_complete(int r)
-{
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
- if (r < 0) {
- lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
- return true;
- }
-
- RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
- switch (m_state) {
- case STATE_COPYUP_OBJECTS:
- ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
- send_pre_remove();
- break;
-
- case STATE_PRE_REMOVE:
- ldout(cct, 5) << " PRE_REMOVE" << dendl;
- send_remove_objects();
- break;
-
- case STATE_REMOVE_OBJECTS:
- ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
- send_post_remove();
- break;
-
- case STATE_POST_REMOVE:
- ldout(cct, 5) << " POST_OBJECTS" << dendl;
- send_clean_boundary();
- break;
-
- case STATE_CLEAN_BOUNDARY:
- ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
- finish(0);
- break;
-
- case STATE_FINISHED:
- ldout(cct, 5) << "FINISHED" << dendl;
- return true;
-
- default:
- lderr(cct) << "invalid state: " << m_state << dendl;
- assert(false);
- break;
- }
- return false;
-}
-
-void TrimRequest::send() {
- send_copyup_objects();
-}
-
-void TrimRequest::send_copyup_objects() {
- assert(m_image_ctx.owner_lock.is_locked());
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
-
- if (m_delete_start >= m_num_objects) {
- send_clean_boundary();
- return;
- }
-
- ::SnapContext snapc;
- bool has_snapshots;
- uint64_t parent_overlap;
- {
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
-
- snapc = m_image_ctx.snapc;
- has_snapshots = !m_image_ctx.snaps.empty();
- int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(),
- &parent_overlap);
- assert(r == 0);
- }
-
- // copyup is only required for portion of image that overlaps parent
- uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout,
- parent_overlap);
- // TODO: protect against concurrent shrink and snap create?
- if (copyup_end <= m_delete_start || !has_snapshots) {
- send_pre_remove();
- return;
- }
-
- uint64_t copyup_start = m_delete_start;
- m_delete_start = copyup_end;
-
- ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: "
- << " start object=" << copyup_start << ", "
- << " end object=" << copyup_end << dendl;
- m_state = STATE_COPYUP_OBJECTS;
-
- Context *ctx = create_callback_context();
- AsyncObjectThrottle<>::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject>(),
- boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2));
- AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
- this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
- copyup_end);
- throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-void TrimRequest::send_remove_objects() {
- assert(m_image_ctx.owner_lock.is_locked());
-
- ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
- << " delete_start=" << m_delete_start
- << " num_objects=" << m_num_objects << dendl;
- m_state = STATE_REMOVE_OBJECTS;
-
- Context *ctx = create_callback_context();
- AsyncObjectThrottle<>::ContextFactory context_factory(
- boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject>(),
- boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
- AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
- this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start,
- m_num_objects);
- throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-void TrimRequest::send_pre_remove() {
- assert(m_image_ctx.owner_lock.is_locked());
- if (m_delete_start >= m_num_objects) {
- send_clean_boundary();
- return;
- }
-
- bool remove_objects = false;
- {
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- if (!m_image_ctx.object_map.enabled()) {
- remove_objects = true;
- } else {
- ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
- << " delete_start=" << m_delete_start
- << " num_objects=" << m_num_objects << dendl;
- m_state = STATE_PRE_REMOVE;
-
- assert(m_image_ctx.image_watcher->is_lock_owner());
-
- // flag the objects as pending deletion
- Context *ctx = create_callback_context();
- RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
- if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
- OBJECT_PENDING, OBJECT_EXISTS,
- ctx)) {
- delete ctx;
- remove_objects = true;
- }
- }
- }
-
- // avoid possible recursive lock attempts
- if (remove_objects) {
- // no object map update required
- send_remove_objects();
- }
-}
-
-void TrimRequest::send_post_remove() {
- assert(m_image_ctx.owner_lock.is_locked());
-
- bool clean_boundary = false;
- {
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- if (!m_image_ctx.object_map.enabled()) {
- clean_boundary = true;
- } else {
- ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
- << " delete_start=" << m_delete_start
- << " num_objects=" << m_num_objects << dendl;
- m_state = STATE_POST_REMOVE;
-
- assert(m_image_ctx.image_watcher->is_lock_owner());
-
- // flag the pending objects as removed
- Context *ctx = create_callback_context();
- RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
- if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
- OBJECT_NONEXISTENT,
- OBJECT_PENDING, ctx)) {
- delete ctx;
- clean_boundary = true;
- }
- }
- }
-
- // avoid possible recursive lock attempts
- if (clean_boundary) {
- // no object map update required
- send_clean_boundary();
- }
-}
-
-void TrimRequest::send_clean_boundary() {
- assert(m_image_ctx.owner_lock.is_locked());
- CephContext *cct = m_image_ctx.cct;
- if (m_delete_off <= m_new_size) {
- finish(0);
- return;
- }
-
- // should have been canceled prior to releasing lock
- assert(!m_image_ctx.image_watcher->is_lock_supported() ||
- m_image_ctx.image_watcher->is_lock_owner());
- uint64_t delete_len = m_delete_off - m_new_size;
- ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
- << " delete_off=" << m_delete_off
- << " length=" << delete_len << dendl;
- m_state = STATE_CLEAN_BOUNDARY;
-
- ::SnapContext snapc;
- {
- RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
- snapc = m_image_ctx.snapc;
- }
-
- // discard the weird boundary
- std::vector<ObjectExtent> extents;
- Striper::file_to_extents(cct, m_image_ctx.format_string,
- &m_image_ctx.layout, m_new_size, delete_len, 0,
- extents);
-
- ContextCompletion *completion =
- new ContextCompletion(create_callback_context(), true);
- for (vector<ObjectExtent>::iterator p = extents.begin();
- p != extents.end(); ++p) {
- ldout(cct, 20) << " ex " << *p << dendl;
- Context *req_comp = new C_ContextCompletion(*completion);
-
- AioObjectRequest *req;
- if (p->offset == 0) {
- req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc,
- req_comp);
- } else {
- req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno,
- p->offset, snapc, req_comp);
- }
- req->send();
- }
- completion->finish_adding_requests();
-}
-
-void TrimRequest::finish(int r) {
- m_state = STATE_FINISHED;
- async_complete(r);
-}
-
-} // namespace librbd
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_TRIM_REQUEST_H
-#define CEPH_LIBRBD_TRIM_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-
-namespace librbd
-{
-
-class ImageCtx;
-class ProgressContext;
-
-class TrimRequest : public AsyncRequest<>
-{
-public:
- TrimRequest(ImageCtx &image_ctx, Context *on_finish,
- uint64_t original_size, uint64_t new_size,
- ProgressContext &prog_ctx);
-
- virtual void send();
-
-protected:
- /**
- * Trim goes through the following state machine to remove whole objects,
- * clean partially trimmed objects, and update the object map:
- *
- * @verbatim
- *
- * <start> . . . . > STATE_FINISHED . . . . . . . . .
- * | . .
- * | . . . . . . . . . . . . .
- * | . .
- * v . .
- * STATE_COPYUP_OBJECTS . . . . .
- * | . . .
- * | . . .
- * v v v .
- * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS .
- * | . . .
- * /-----------------------/ . . . . . . . .
- * | . . .
- * v v v v
- * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
- * . ^
- * . .
- * . . . . . . . . . . . . . . . . . . . . . . .
- *
- * @endverbatim
- *
- * The _COPYUP_OBJECTS state is skipped if there is no parent overlap
- * within the new image size and the image does not have any snapshots.
- * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
- * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
- * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary
- * objects are cleaned. The state machine will immediately transition
- * to _FINISHED state if there are no bytes to trim.
- */
-
- enum State {
- STATE_COPYUP_OBJECTS,
- STATE_PRE_REMOVE,
- STATE_REMOVE_OBJECTS,
- STATE_POST_REMOVE,
- STATE_CLEAN_BOUNDARY,
- STATE_FINISHED
- };
-
- virtual bool should_complete(int r);
-
- State m_state;
-
-private:
- uint64_t m_delete_start;
- uint64_t m_num_objects;
- uint64_t m_delete_off;
- uint64_t m_new_size;
- ProgressContext &m_prog_ctx;
-
- void send_copyup_objects();
- void send_remove_objects();
- void send_pre_remove();
- void send_post_remove();
- void send_clean_boundary();
- void finish(int r);
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_TRIM_REQUEST_H
#include "librbd/ImageCtx.h"
#include "librbd/ImageWatcher.h"
#include "librbd/internal.h"
-#include "librbd/FlattenRequest.h"
#include "librbd/Journal.h"
#include "librbd/ObjectMap.h"
#include "librbd/parent_types.h"
-#include "librbd/RebuildObjectMapRequest.h"
-#include "librbd/ResizeRequest.h"
-#include "librbd/TrimRequest.h"
+#include "librbd/operation/FlattenRequest.h"
+#include "librbd/operation/RebuildObjectMapRequest.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/operation/TrimRequest.h"
#include "include/util.h"
#include <boost/bind.hpp>
C_SaferCond ctx;
ictx->snap_lock.get_read();
- TrimRequest *req = new TrimRequest(*ictx, &ctx, ictx->size, newsize,
- prog_ctx);
+ operation::TrimRequest *req = new operation::TrimRequest(
+ *ictx, &ctx, ictx->size, newsize, prog_ctx);
ictx->snap_lock.put_read();
req->send();
ProgressContext& prog_ctx)
{
assert(ictx->owner_lock.is_locked());
- ResizeRequest *req = new ResizeRequest(*ictx, ctx, new_size, prog_ctx);
+ operation::ResizeRequest *req = new operation::ResizeRequest(
+ *ictx, ctx, new_size, prog_ctx);
req->send();
}
overlap_objects = Striper::get_num_objects(ictx->layout, overlap);
}
- FlattenRequest *req = new FlattenRequest(*ictx, ctx, object_size,
- overlap_objects, snapc, prog_ctx);
+ operation::FlattenRequest *req = new operation::FlattenRequest(
+ *ictx, ctx, object_size, overlap_objects, snapc, prog_ctx);
req->send();
return 0;
}
return r;
}
- RebuildObjectMapRequest *req = new RebuildObjectMapRequest(*ictx, ctx,
- prog_ctx);
+ operation::RebuildObjectMapRequest *req =
+ new operation::RebuildObjectMapRequest(*ictx, ctx, prog_ctx);
req->send();
return 0;
}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/FlattenRequest.h"
+#include "librbd/AioObjectRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::FlattenRequest: "
+
+namespace librbd {
+namespace operation {
+
+class C_FlattenObject : public C_AsyncObjectThrottle<> {
+public:
+ C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+ uint64_t object_size, ::SnapContext snapc, uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size),
+ m_snapc(snapc), m_object_no(object_no)
+ {
+ }
+
+ virtual int send() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ if (m_image_ctx.image_watcher->is_lock_supported() &&
+ !m_image_ctx.image_watcher->is_lock_owner()) {
+ ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
+ return -ERESTART;
+ }
+
+ bufferlist bl;
+ string oid = m_image_ctx.get_object_name(m_object_no);
+ AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0,
+ bl, m_snapc, this);
+ if (!req->has_parent()) {
+ // stop early if the parent went away - it just means
+ // another flatten finished first or the image was resized
+ delete req;
+ return 1;
+ }
+
+ req->send();
+ return 0;
+ }
+
+private:
+ uint64_t m_object_size;
+ ::SnapContext m_snapc;
+ uint64_t m_object_no;
+};
+
+bool FlattenRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+ if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
+ lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+ switch (m_state) {
+ case STATE_FLATTEN_OBJECTS:
+ ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
+ return send_update_header();
+
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ return send_update_children();
+
+ case STATE_UPDATE_CHILDREN:
+ ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void FlattenRequest::send() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " send" << dendl;
+
+ m_state = STATE_FLATTEN_OBJECTS;
+ AsyncObjectThrottle<>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject>(),
+ boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
+ boost::lambda::_2));
+ AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+ this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
+ 0, m_overlap_objects);
+ throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+bool FlattenRequest::send_update_header() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 5) << this << " send_update_header" << dendl;
+ m_state = STATE_UPDATE_HEADER;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ {
+ RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
+ // stop early if the parent went away - it just means
+ // another flatten finished first, so this one is useless.
+ if (!m_image_ctx.parent) {
+ ldout(cct, 5) << "image already flattened" << dendl;
+ return true;
+ }
+ m_parent_spec = m_image_ctx.parent_md.spec;
+ }
+ m_ignore_enoent = true;
+
+ // remove parent from this (base) image
+ librados::ObjectWriteOperation op;
+ if (m_image_ctx.image_watcher->is_lock_supported()) {
+ m_image_ctx.image_watcher->assert_header_locked(&op);
+ }
+ cls_client::remove_parent(&op);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+ return false;
+}
+
+bool FlattenRequest::send_update_children() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ // if there are no snaps, remove from the children object as well
+ // (if snapshots remain, they have their own parent info, and the child
+ // will be removed when the last snap goes away)
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
+ !m_image_ctx.snaps.empty()) {
+ return true;
+ }
+
+ ldout(cct, 2) << "removing child from children list..." << dendl;
+ m_state = STATE_UPDATE_CHILDREN;
+
+ librados::ObjectWriteOperation op;
+ cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
+ &op);
+ assert(r == 0);
+ rados_completion->release();
+ return false;
+}
+
+} // namespace operation
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "librbd/parent_types.h"
+#include "common/snap_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class FlattenRequest : public AsyncRequest<>
+{
+public:
+ FlattenRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t object_size, uint64_t overlap_objects,
+ const ::SnapContext &snapc, ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
+ m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
+ m_ignore_enoent(false)
+ {
+ }
+
+ virtual void send();
+
+protected:
+ virtual bool should_complete(int r);
+
+private:
+ /**
+ * Flatten goes through the following state machine to copyup objects
+ * from the parent image:
+ *
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
+ * . | .
+ * . | .
+ * . v .
+ * . STATE_UPDATE_CHILDREN .
+ * . | .
+ * . | .
+ * . \---> <finish> < . .
+ * . ^
+ * . .
+ * . . . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ *
+ * The _UPDATE_CHILDREN state will be skipped if the image has one or
+ * more snapshots. The _UPDATE_HEADER state will be skipped if the
+ * image was concurrently flattened by another client.
+ */
+ enum State {
+ STATE_FLATTEN_OBJECTS,
+ STATE_UPDATE_HEADER,
+ STATE_UPDATE_CHILDREN
+ };
+
+ uint64_t m_object_size;
+ uint64_t m_overlap_objects;
+ ::SnapContext m_snapc;
+ ProgressContext &m_prog_ctx;
+ State m_state;
+
+ parent_spec m_parent_spec;
+ bool m_ignore_enoent;
+
+ bool send_update_header();
+ bool send_update_children();
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/RebuildObjectMapRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/operation/TrimRequest.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+class C_VerifyObject : public C_AsyncObjectThrottle<> {
+public:
+ C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+ uint64_t snap_id, uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id),
+ m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no))
+ {
+ m_io_ctx.dup(m_image_ctx.md_ctx);
+ m_io_ctx.snap_set_read(CEPH_SNAPDIR);
+ }
+
+ virtual void complete(int r) {
+ if (should_complete(r)) {
+ ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed "
+ << dendl;
+ finish(r);
+ delete this;
+ }
+ }
+
+ virtual int send() {
+ send_list_snaps();
+ return 0;
+ }
+
+private:
+ librados::IoCtx m_io_ctx;
+ uint64_t m_snap_id;
+ uint64_t m_object_no;
+ std::string m_oid;
+
+ librados::snap_set_t m_snap_set;
+ int m_snap_list_ret;
+
+ bool should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ if (r == 0) {
+ r = m_snap_list_ret;
+ }
+ if (r < 0 && r != -ENOENT) {
+ lderr(cct) << m_oid << " C_VerifyObject::should_complete: "
+ << "encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r="
+ << r << dendl;
+ return update_object_map(get_object_state());
+ }
+
+ void send_list_snaps() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps"
+ << dendl;
+
+ librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+ this, NULL, rados_ctx_cb);
+
+ librados::ObjectReadOperation op;
+ op.list_snaps(&m_snap_set, &m_snap_list_ret);
+
+ int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL);
+ assert(r == 0);
+ comp->release();
+ }
+
+ uint8_t get_object_state() {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ for (std::vector<librados::clone_info_t>::const_iterator r =
+ m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) {
+ librados::snap_t from_snap_id;
+ librados::snap_t to_snap_id;
+ if (r->cloneid == librados::SNAP_HEAD) {
+ from_snap_id = next_valid_snap_id(m_snap_set.seq + 1);
+ to_snap_id = librados::SNAP_HEAD;
+ } else {
+ from_snap_id = next_valid_snap_id(r->snaps[0]);
+ to_snap_id = r->snaps[r->snaps.size()-1];
+ }
+
+ if (to_snap_id < m_snap_id) {
+ continue;
+ } else if (m_snap_id < from_snap_id) {
+ break;
+ }
+
+ if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 &&
+ from_snap_id != m_snap_id) {
+ return OBJECT_EXISTS_CLEAN;
+ }
+ return OBJECT_EXISTS;
+ }
+ return OBJECT_NONEXISTENT;
+ }
+
+ uint64_t next_valid_snap_id(uint64_t snap_id) {
+ assert(m_image_ctx.snap_lock.is_locked());
+
+ std::map<librados::snap_t, SnapInfo>::iterator it =
+ m_image_ctx.snap_info.lower_bound(snap_id);
+ if (it == m_image_ctx.snap_info.end()) {
+ return CEPH_NOSNAP;
+ }
+ return it->first;
+ }
+
+ bool update_object_map(uint8_t new_state) {
+ RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+ CephContext *cct = m_image_ctx.cct;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ RWLock::WLocker l(m_image_ctx.object_map_lock);
+ uint8_t state = m_image_ctx.object_map[m_object_no];
+ if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT &&
+ m_snap_id == CEPH_NOSNAP) {
+ // might be writing object to OSD concurrently
+ new_state = state;
+ }
+
+ if (new_state != state) {
+ ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map "
+ << static_cast<uint32_t>(state) << "->"
+ << static_cast<uint32_t>(new_state) << dendl;
+ m_image_ctx.object_map[m_object_no] = new_state;
+ }
+ return true;
+ }
+};
+
+} // anonymous namespace
+
+
+void RebuildObjectMapRequest::send() {
+ send_resize_object_map();
+}
+
+bool RebuildObjectMapRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+ RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+ switch (m_state) {
+ case STATE_RESIZE_OBJECT_MAP:
+ ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl;
+ if (r == -ESTALE && !m_attempted_trim) {
+ // objects are still flagged as in-use -- delete them
+ m_attempted_trim = true;
+ send_trim_image();
+ return false;
+ } else if (r == 0) {
+ send_verify_objects();
+ }
+ break;
+
+ case STATE_TRIM_IMAGE:
+ ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+ if (r == 0) {
+ send_resize_object_map();
+ }
+ break;
+
+ case STATE_VERIFY_OBJECTS:
+ ldout(cct, 5) << "VERIFY_OBJECTS" << dendl;
+ if (r == 0) {
+ send_save_object_map();
+ }
+ break;
+
+ case STATE_SAVE_OBJECT_MAP:
+ ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl;
+ if (r == 0) {
+ send_update_header();
+ }
+ break;
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ if (r == 0) {
+ return true;
+ }
+ break;
+
+ default:
+ assert(false);
+ break;
+ }
+
+ if (r < 0) {
+ lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r)
+ << dendl;
+ return true;
+ }
+ return false;
+}
+
+void RebuildObjectMapRequest::send_resize_object_map() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ uint64_t num_objects;
+ uint64_t size;
+ {
+ RWLock::RLocker l(m_image_ctx.snap_lock);
+ size = get_image_size();
+ num_objects = Striper::get_num_objects(m_image_ctx.layout, size);
+ }
+
+ if (m_image_ctx.object_map.size() == num_objects) {
+ send_verify_objects();
+ return;
+ }
+
+ ldout(cct, 5) << this << " send_resize_object_map" << dendl;
+ m_state = STATE_RESIZE_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+ m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT,
+ create_callback_context());
+}
+
+void RebuildObjectMapRequest::send_trim_image() {
+ CephContext *cct = m_image_ctx.cct;
+
+ RWLock::RLocker l(m_image_ctx.owner_lock);
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+ ldout(cct, 5) << this << " send_trim_image" << dendl;
+ m_state = STATE_TRIM_IMAGE;
+
+ uint64_t new_size;
+ uint64_t orig_size;
+ {
+ RWLock::RLocker l(m_image_ctx.snap_lock);
+ new_size = get_image_size();
+ orig_size = m_image_ctx.get_object_size() *
+ m_image_ctx.object_map.size();
+ }
+ TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
+ orig_size, new_size, m_prog_ctx);
+ req->send();
+}
+
+void RebuildObjectMapRequest::send_verify_objects() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ uint64_t snap_id;
+ uint64_t num_objects;
+ {
+ RWLock::RLocker l(m_image_ctx.snap_lock);
+ snap_id = m_image_ctx.snap_id;
+ num_objects = Striper::get_num_objects(m_image_ctx.layout,
+ m_image_ctx.get_image_size(snap_id));
+ }
+
+ if (num_objects == 0) {
+ send_save_object_map();
+ return;
+ }
+
+ m_state = STATE_VERIFY_OBJECTS;
+ ldout(cct, 5) << this << " send_verify_objects" << dendl;
+
+ AsyncObjectThrottle<>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObject>(),
+ boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2));
+ AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+ this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
+ 0, num_objects);
+ throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+void RebuildObjectMapRequest::send_save_object_map() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+
+ ldout(cct, 5) << this << " send_save_object_map" << dendl;
+ m_state = STATE_SAVE_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+ m_image_ctx.object_map.aio_save(create_callback_context());
+}
+
+void RebuildObjectMapRequest::send_update_header() {
+ assert(m_image_ctx.owner_lock.is_locked());
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl;
+ m_state = STATE_UPDATE_HEADER;
+
+ librados::ObjectWriteOperation op;
+ if (m_image_ctx.image_watcher->is_lock_supported()) {
+ m_image_ctx.image_watcher->assert_header_locked(&op);
+ }
+
+ uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID;
+ cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags);
+
+ librados::AioCompletion *comp = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
+ assert(r == 0);
+ comp->release();
+
+ RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+ m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false);
+}
+
+uint64_t RebuildObjectMapRequest::get_image_size() const {
+ assert(m_image_ctx.snap_lock.is_locked());
+ if (m_image_ctx.snap_id == CEPH_NOSNAP) {
+ if (!m_image_ctx.resize_reqs.empty()) {
+ return m_image_ctx.resize_reqs.front()->get_image_size();
+ } else {
+ return m_image_ctx.size;
+ }
+ }
+ return m_image_ctx.get_image_size(m_image_ctx.snap_id);
+}
+
+} // namespace operation
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/AsyncRequest.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class RebuildObjectMapRequest : public AsyncRequest<> {
+public:
+
+ RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish,
+ ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx),
+ m_prog_ctx(prog_ctx), m_attempted_trim(false)
+ {
+ }
+
+ virtual void send();
+
+protected:
+ virtual bool should_complete(int r);
+
+private:
+ /**
+ * Rebuild object map goes through the following state machine to
+ * verify per-object state:
+ *
+ * <start>
+ * . | . . . . . . . . . .
+ * . | . .
+ * . v v .
+ * . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE
+ * . |
+ * . v
+ * . . . > STATE_VERIFY_OBJECTS
+ * |
+ * v
+ * STATE_SAVE_OBJECT_MAP
+ * |
+ * v
+ * STATE_UPDATE_HEADER
+ *
+ * The _RESIZE_OBJECT_MAP state will be skipped if the object map
+ * is appropriately sized for the image. The _TRIM_IMAGE state will
+ * only be hit if the resize failed due to an in-use object.
+ */
+ enum State {
+ STATE_RESIZE_OBJECT_MAP,
+ STATE_TRIM_IMAGE,
+ STATE_VERIFY_OBJECTS,
+ STATE_SAVE_OBJECT_MAP,
+ STATE_UPDATE_HEADER
+ };
+
+ ImageCtx &m_image_ctx;
+ ProgressContext &m_prog_ctx;
+ State m_state;
+ bool m_attempted_trim;
+
+ void send_resize_object_map();
+ void send_trim_image();
+ void send_verify_objects();
+ void send_save_object_map();
+ void send_update_header();
+
+ uint64_t get_image_size() const;
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/TrimRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ResizeRequest: "
+
+namespace librbd {
+namespace operation {
+
+ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t new_size,
+ ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish),
+ m_original_size(0), m_new_size(new_size),
+ m_prog_ctx(prog_ctx), m_new_parent_overlap(0),
+ m_xlist_item(this)
+{
+}
+
+ResizeRequest::~ResizeRequest() {
+ ResizeRequest *next_req = NULL;
+ {
+ RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+ assert(m_xlist_item.remove_myself());
+ if (!m_image_ctx.resize_reqs.empty()) {
+ next_req = m_image_ctx.resize_reqs.front();
+ }
+ }
+
+ if (next_req != NULL) {
+ RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+ next_req->send();
+ }
+}
+
+bool ResizeRequest::should_complete(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+ if (m_state == STATE_FINISHED) {
+ ldout(cct, 5) << "FINISHED" << dendl;
+ return true;
+ }
+
+ RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+ switch (m_state) {
+ case STATE_FLUSH:
+ ldout(cct, 5) << "FLUSH" << dendl;
+ send_invalidate_cache();
+ break;
+
+ case STATE_INVALIDATE_CACHE:
+ ldout(cct, 5) << "INVALIDATE_CACHE" << dendl;
+ send_trim_image();
+ break;
+
+ case STATE_TRIM_IMAGE:
+ ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+ send_update_header();
+ break;
+
+ case STATE_GROW_OBJECT_MAP:
+ ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
+ send_update_header();
+ break;
+
+ case STATE_UPDATE_HEADER:
+ ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+ if (send_shrink_object_map()) {
+ update_size_and_overlap();
+ return true;
+ }
+ break;
+
+ case STATE_SHRINK_OBJECT_MAP:
+ ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
+ update_size_and_overlap();
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void ResizeRequest::send() {
+ assert(m_image_ctx.owner_lock.is_locked());
+
+ {
+ RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+ if (!m_xlist_item.is_on_list()) {
+ m_image_ctx.resize_reqs.push_back(&m_xlist_item);
+ if (m_image_ctx.resize_reqs.front() != this) {
+ return;
+ }
+ }
+
+ assert(m_image_ctx.resize_reqs.front() == this);
+ m_original_size = m_image_ctx.size;
+ compute_parent_overlap();
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ if (is_canceled()) {
+ complete(-ERESTART);
+ } else if (m_original_size == m_new_size) {
+ ldout(cct, 2) << this << " no change in size (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ m_state = STATE_FINISHED;
+ complete(0);
+ } else if (m_new_size > m_original_size) {
+ ldout(cct, 2) << this << " expanding image (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ send_grow_object_map();
+ } else {
+ ldout(cct, 2) << this << " shrinking image (" << m_original_size
+ << " -> " << m_new_size << ")" << dendl;
+ send_flush();
+ }
+}
+
+void ResizeRequest::send_flush() {
+ ldout(m_image_ctx.cct, 5) << this << " send_flush: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_FLUSH;
+
+ // with clipping adjusted, ensure that write / copy-on-read operations won't
+ // (re-)create objects that we just removed. need async callback to ensure
+ // we don't have cache_lock already held
+ m_image_ctx.flush_async_operations(create_async_callback_context());
+}
+
+void ResizeRequest::send_invalidate_cache() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_INVALIDATE_CACHE;
+
+ // need to invalidate since we're deleting objects, and
+ // ObjectCacher doesn't track non-existent objects
+ m_image_ctx.invalidate_cache(create_callback_context());
+}
+
+void ResizeRequest::send_trim_image() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_TRIM_IMAGE;
+
+ TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
+ m_original_size, m_new_size, m_prog_ctx);
+ req->send();
+}
+
+void ResizeRequest::send_grow_object_map() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ if (!m_image_ctx.object_map.enabled()) {
+ send_update_header();
+ return;
+ }
+
+ ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_GROW_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
+ create_callback_context());
+}
+
+bool ResizeRequest::send_shrink_object_map() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) {
+ return true;
+ }
+
+ ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_SHRINK_OBJECT_MAP;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
+ create_callback_context());
+ return false;
+}
+
+void ResizeRequest::send_update_header() {
+ assert(m_image_ctx.owner_lock.is_locked());
+
+ ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
+ << " original_size=" << m_original_size
+ << " new_size=" << m_new_size << dendl;
+ m_state = STATE_UPDATE_HEADER;
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ librados::ObjectWriteOperation op;
+ if (m_image_ctx.old_format) {
+ // rewrite only the size field of the header
+ // NOTE: format 1 image headers are not stored in fixed endian format
+ bufferlist bl;
+ bl.append(reinterpret_cast<const char*>(&m_new_size), sizeof(m_new_size));
+ op.write(offsetof(rbd_obj_header_ondisk, image_size), bl);
+ } else {
+ if (m_image_ctx.image_watcher->is_lock_supported()) {
+ m_image_ctx.image_watcher->assert_header_locked(&op);
+ }
+ cls_client::set_size(&op, m_new_size);
+ }
+
+ librados::AioCompletion *rados_completion = create_callback_completion();
+ int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+ rados_completion, &op);
+ assert(r == 0);
+ rados_completion->release();
+}
+
+void ResizeRequest::compute_parent_overlap() {
+ RWLock::RLocker l2(m_image_ctx.parent_lock);
+ if (m_image_ctx.parent == NULL) {
+ m_new_parent_overlap = 0;
+ } else {
+ m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap);
+ }
+}
+
+void ResizeRequest::update_size_and_overlap() {
+ RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+ m_image_ctx.size = m_new_size;
+
+ RWLock::WLocker parent_locker(m_image_ctx.parent_lock);
+ if (m_image_ctx.parent != NULL && m_new_size < m_original_size) {
+ m_image_ctx.parent_md.overlap = m_new_parent_overlap;
+ }
+}
+
+} // namespace operation
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "include/xlist.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class ResizeRequest : public AsyncRequest<>
+{
+public:
+ ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size,
+ ProgressContext &prog_ctx);
+ virtual ~ResizeRequest();
+
+ virtual void send();
+
+ inline bool shrinking() const {
+ return m_new_size < m_original_size;
+ }
+
+ inline uint64_t get_image_size() const {
+ return m_new_size;
+ }
+
+private:
+ /**
+ * Resize goes through the following state machine to resize the image
+ * and update the object map:
+ *
+ * @verbatim
+ *
+ * <start> -------------> STATE_FINISHED -----------------------------\
+ * | . (no change) |
+ * | . |
+ * | . . . . . . . . . . . . . . . . . . . . . |
+ * | . |
+ * | v |
+ * |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------|
+ * | (grow) |
+ * | |
+ * | |
+ * \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE |
+ * (shrink) | |
+ * | |
+ * /----------------------/ |
+ * | |
+ * v |
+ * STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . . |
+ * | . |
+ * | . |
+ * v v v
+ * STATE_SHRINK_OBJECT_MAP ---> <finish>
+ *
+ * @endverbatim
+ *
+ * The _OBJECT_MAP states are skipped if the object map isn't enabled.
+ * The state machine will immediately transition to _FINISHED if there
+ * are no objects to trim.
+ */
+ enum State {
+ STATE_FLUSH,
+ STATE_INVALIDATE_CACHE,
+ STATE_TRIM_IMAGE,
+ STATE_GROW_OBJECT_MAP,
+ STATE_UPDATE_HEADER,
+ STATE_SHRINK_OBJECT_MAP,
+ STATE_FINISHED
+ };
+
+ State m_state;
+ uint64_t m_original_size;
+ uint64_t m_new_size;
+ ProgressContext &m_prog_ctx;
+ uint64_t m_new_parent_overlap;
+
+ xlist<ResizeRequest *>::item m_xlist_item;
+
+ virtual bool should_complete(int r);
+
+ void send_flush();
+ void send_invalidate_cache();
+ void send_trim_image();
+ void send_grow_object_map();
+ bool send_shrink_object_map();
+ void send_update_header();
+
+ void compute_parent_overlap();
+ void update_size_and_overlap();
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/TrimRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AioObjectRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/ContextCompletion.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+
+#include <boost/bind.hpp>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::TrimRequest: "
+
+namespace librbd {
+namespace operation {
+
+class C_CopyupObject : public C_AsyncObjectThrottle<> {
+public:
+ C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+ ::SnapContext snapc, uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc),
+ m_object_no(object_no)
+ {
+ }
+
+ virtual int send() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ string oid = m_image_ctx.get_object_name(m_object_no);
+ ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
+
+ AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no,
+ m_snapc, this);
+ req->send();
+ return 0;
+ }
+private:
+ ::SnapContext m_snapc;
+ uint64_t m_object_no;
+};
+
+class C_RemoveObject : public C_AsyncObjectThrottle<> {
+public:
+ C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+ uint64_t object_no)
+ : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no)
+ {
+ }
+
+ virtual int send() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+ if (!m_image_ctx.object_map.object_may_exist(m_object_no)) {
+ return 1;
+ }
+
+ string oid = m_image_ctx.get_object_name(m_object_no);
+ ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
+
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
+ int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
+ assert(r == 0);
+ rados_completion->release();
+ return 0;
+ }
+
+private:
+ uint64_t m_object_no;
+};
+
+TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx)
+ : AsyncRequest(image_ctx, on_finish), m_new_size(new_size),
+ m_prog_ctx(prog_ctx)
+{
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t new_num_periods = ((m_new_size + period - 1) / period);
+ m_delete_off = MIN(new_num_periods * period, original_size);
+ // first object we can delete free and clear
+ m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
+ m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 10) << this << " trim image " << original_size << " -> "
+ << m_new_size << " periods " << new_num_periods
+ << " discard to offset " << m_delete_off
+ << " delete objects " << m_delete_start
+ << " to " << m_num_objects << dendl;
+}
+
+
+bool TrimRequest::should_complete(int r)
+{
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
+ if (r < 0) {
+ lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
+ return true;
+ }
+
+ RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+ switch (m_state) {
+ case STATE_COPYUP_OBJECTS:
+ ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
+ send_pre_remove();
+ break;
+
+ case STATE_PRE_REMOVE:
+ ldout(cct, 5) << " PRE_REMOVE" << dendl;
+ send_remove_objects();
+ break;
+
+ case STATE_REMOVE_OBJECTS:
+ ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
+ send_post_remove();
+ break;
+
+ case STATE_POST_REMOVE:
+ ldout(cct, 5) << " POST_OBJECTS" << dendl;
+ send_clean_boundary();
+ break;
+
+ case STATE_CLEAN_BOUNDARY:
+ ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
+ finish(0);
+ break;
+
+ case STATE_FINISHED:
+ ldout(cct, 5) << "FINISHED" << dendl;
+ return true;
+
+ default:
+ lderr(cct) << "invalid state: " << m_state << dendl;
+ assert(false);
+ break;
+ }
+ return false;
+}
+
+void TrimRequest::send() {
+ send_copyup_objects();
+}
+
+void TrimRequest::send_copyup_objects() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+
+ if (m_delete_start >= m_num_objects) {
+ send_clean_boundary();
+ return;
+ }
+
+ ::SnapContext snapc;
+ bool has_snapshots;
+ uint64_t parent_overlap;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
+
+ snapc = m_image_ctx.snapc;
+ has_snapshots = !m_image_ctx.snaps.empty();
+ int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(),
+ &parent_overlap);
+ assert(r == 0);
+ }
+
+ // copyup is only required for portion of image that overlaps parent
+ uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout,
+ parent_overlap);
+ // TODO: protect against concurrent shrink and snap create?
+ if (copyup_end <= m_delete_start || !has_snapshots) {
+ send_pre_remove();
+ return;
+ }
+
+ uint64_t copyup_start = m_delete_start;
+ m_delete_start = copyup_end;
+
+ ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: "
+ << " start object=" << copyup_start << ", "
+ << " end object=" << copyup_end << dendl;
+ m_state = STATE_COPYUP_OBJECTS;
+
+ Context *ctx = create_callback_context();
+ AsyncObjectThrottle<>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject>(),
+ boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2));
+ AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+ this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
+ copyup_end);
+ throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+void TrimRequest::send_remove_objects() {
+ assert(m_image_ctx.owner_lock.is_locked());
+
+ ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_REMOVE_OBJECTS;
+
+ Context *ctx = create_callback_context();
+ AsyncObjectThrottle<>::ContextFactory context_factory(
+ boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject>(),
+ boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
+ AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+ this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start,
+ m_num_objects);
+ throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+void TrimRequest::send_pre_remove() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ if (m_delete_start >= m_num_objects) {
+ send_clean_boundary();
+ return;
+ }
+
+ bool remove_objects = false;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ if (!m_image_ctx.object_map.enabled()) {
+ remove_objects = true;
+ } else {
+ ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_PRE_REMOVE;
+
+ assert(m_image_ctx.image_watcher->is_lock_owner());
+
+ // flag the objects as pending deletion
+ Context *ctx = create_callback_context();
+ RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
+ if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
+ OBJECT_PENDING, OBJECT_EXISTS,
+ ctx)) {
+ delete ctx;
+ remove_objects = true;
+ }
+ }
+ }
+
+ // avoid possible recursive lock attempts
+ if (remove_objects) {
+ // no object map update required
+ send_remove_objects();
+ }
+}
+
+void TrimRequest::send_post_remove() {
+ assert(m_image_ctx.owner_lock.is_locked());
+
+ bool clean_boundary = false;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ if (!m_image_ctx.object_map.enabled()) {
+ clean_boundary = true;
+ } else {
+ ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
+ << " delete_start=" << m_delete_start
+ << " num_objects=" << m_num_objects << dendl;
+ m_state = STATE_POST_REMOVE;
+
+ assert(m_image_ctx.image_watcher->is_lock_owner());
+
+ // flag the pending objects as removed
+ Context *ctx = create_callback_context();
+ RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
+ if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
+ OBJECT_NONEXISTENT,
+ OBJECT_PENDING, ctx)) {
+ delete ctx;
+ clean_boundary = true;
+ }
+ }
+ }
+
+ // avoid possible recursive lock attempts
+ if (clean_boundary) {
+ // no object map update required
+ send_clean_boundary();
+ }
+}
+
+void TrimRequest::send_clean_boundary() {
+ assert(m_image_ctx.owner_lock.is_locked());
+ CephContext *cct = m_image_ctx.cct;
+ if (m_delete_off <= m_new_size) {
+ finish(0);
+ return;
+ }
+
+ // should have been canceled prior to releasing lock
+ assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+ m_image_ctx.image_watcher->is_lock_owner());
+ uint64_t delete_len = m_delete_off - m_new_size;
+ ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
+ << " delete_off=" << m_delete_off
+ << " length=" << delete_len << dendl;
+ m_state = STATE_CLEAN_BOUNDARY;
+
+ ::SnapContext snapc;
+ {
+ RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+ snapc = m_image_ctx.snapc;
+ }
+
+ // discard the weird boundary
+ std::vector<ObjectExtent> extents;
+ Striper::file_to_extents(cct, m_image_ctx.format_string,
+ &m_image_ctx.layout, m_new_size, delete_len, 0,
+ extents);
+
+ ContextCompletion *completion =
+ new ContextCompletion(create_callback_context(), true);
+ for (vector<ObjectExtent>::iterator p = extents.begin();
+ p != extents.end(); ++p) {
+ ldout(cct, 20) << " ex " << *p << dendl;
+ Context *req_comp = new C_ContextCompletion(*completion);
+
+ AioObjectRequest *req;
+ if (p->offset == 0) {
+ req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc,
+ req_comp);
+ } else {
+ req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno,
+ p->offset, snapc, req_comp);
+ }
+ req->send();
+ }
+ completion->finish_adding_requests();
+}
+
+void TrimRequest::finish(int r) {
+ m_state = STATE_FINISHED;
+ async_complete(r);
+}
+
+} // namespace operation
+} // namespace librbd
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class TrimRequest : public AsyncRequest<>
+{
+public:
+ TrimRequest(ImageCtx &image_ctx, Context *on_finish,
+ uint64_t original_size, uint64_t new_size,
+ ProgressContext &prog_ctx);
+
+ virtual void send();
+
+protected:
+ /**
+ * Trim goes through the following state machine to remove whole objects,
+ * clean partially trimmed objects, and update the object map:
+ *
+ * @verbatim
+ *
+ * <start> . . . . > STATE_FINISHED . . . . . . . . .
+ * | . .
+ * | . . . . . . . . . . . . .
+ * | . .
+ * v . .
+ * STATE_COPYUP_OBJECTS . . . . .
+ * | . . .
+ * | . . .
+ * v v v .
+ * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS .
+ * | . . .
+ * /-----------------------/ . . . . . . . .
+ * | . . .
+ * v v v v
+ * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
+ * . ^
+ * . .
+ * . . . . . . . . . . . . . . . . . . . . . . .
+ *
+ * @endverbatim
+ *
+ * The _COPYUP_OBJECTS state is skipped if there is no parent overlap
+ * within the new image size and the image does not have any snapshots.
+ * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+ * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
+ * are removed. The _CLEAN_BOUNDARY state is skipped if no boundary
+ * objects are cleaned. The state machine will immediately transition
+ * to _FINISHED state if there are no bytes to trim.
+ */
+
+ enum State {
+ STATE_COPYUP_OBJECTS,
+ STATE_PRE_REMOVE,
+ STATE_REMOVE_OBJECTS,
+ STATE_POST_REMOVE,
+ STATE_CLEAN_BOUNDARY,
+ STATE_FINISHED
+ };
+
+ virtual bool should_complete(int r);
+
+ State m_state;
+
+private:
+ uint64_t m_delete_start;
+ uint64_t m_num_objects;
+ uint64_t m_delete_off;
+ uint64_t m_new_size;
+ ProgressContext &m_prog_ctx;
+
+ void send_copyup_objects();
+ void send_remove_objects();
+ void send_pre_remove();
+ void send_post_remove();
+ void send_clean_boundary();
+ void finish(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H