]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: move all image operation state machines to new namespace
authorJason Dillaman <dillaman@redhat.com>
Wed, 26 Aug 2015 18:57:16 +0000 (14:57 -0400)
committerJason Dillaman <dillaman@redhat.com>
Tue, 17 Nov 2015 20:10:14 +0000 (15:10 -0500)
Prior to creating multiple new state machines for image operations,
move all existing op requests to a new namespace and folder.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
20 files changed:
src/librbd/FlattenRequest.cc [deleted file]
src/librbd/FlattenRequest.h [deleted file]
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/Makefile.am
src/librbd/RebuildObjectMapRequest.cc [deleted file]
src/librbd/RebuildObjectMapRequest.h [deleted file]
src/librbd/ResizeRequest.cc [deleted file]
src/librbd/ResizeRequest.h [deleted file]
src/librbd/TrimRequest.cc [deleted file]
src/librbd/TrimRequest.h [deleted file]
src/librbd/internal.cc
src/librbd/operation/FlattenRequest.cc [new file with mode: 0644]
src/librbd/operation/FlattenRequest.h [new file with mode: 0644]
src/librbd/operation/RebuildObjectMapRequest.cc [new file with mode: 0644]
src/librbd/operation/RebuildObjectMapRequest.h [new file with mode: 0644]
src/librbd/operation/ResizeRequest.cc [new file with mode: 0644]
src/librbd/operation/ResizeRequest.h [new file with mode: 0644]
src/librbd/operation/TrimRequest.cc [new file with mode: 0644]
src/librbd/operation/TrimRequest.h [new file with mode: 0644]

diff --git a/src/librbd/FlattenRequest.cc b/src/librbd/FlattenRequest.cc
deleted file mode 100644 (file)
index 5b3e4a2..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "librbd/FlattenRequest.h"
-#include "librbd/AioObjectRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/ObjectMap.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::FlattenRequest: "
-
-namespace librbd {
-
-class C_FlattenObject : public C_AsyncObjectThrottle<> {
-public:
-  C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
-                  uint64_t object_size, ::SnapContext snapc, uint64_t object_no)
-    : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size),
-      m_snapc(snapc), m_object_no(object_no)
-  {
-  }
-
-  virtual int send() {
-    assert(m_image_ctx.owner_lock.is_locked());
-    CephContext *cct = m_image_ctx.cct;
-
-    if (m_image_ctx.image_watcher->is_lock_supported() &&
-        !m_image_ctx.image_watcher->is_lock_owner()) {
-      ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
-      return -ERESTART;
-    }
-
-    bufferlist bl;
-    string oid = m_image_ctx.get_object_name(m_object_no);
-    AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0,
-                                             bl, m_snapc, this);
-    if (!req->has_parent()) {
-      // stop early if the parent went away - it just means
-      // another flatten finished first or the image was resized
-      delete req;
-      return 1;
-    }
-
-    req->send();
-    return 0;
-  }
-
-private:
-  uint64_t m_object_size;
-  ::SnapContext m_snapc;
-  uint64_t m_object_no;
-};
-
-bool FlattenRequest::should_complete(int r) {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-  if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
-    lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
-    return true;
-  }
-
-  RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-  switch (m_state) {
-  case STATE_FLATTEN_OBJECTS:
-    ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
-    return send_update_header();
-
-  case STATE_UPDATE_HEADER:
-    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
-    return send_update_children();
-
-  case STATE_UPDATE_CHILDREN:
-    ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
-    return true;
-
-  default:
-    lderr(cct) << "invalid state: " << m_state << dendl;
-    assert(false);
-    break;
-  }
-  return false;
-}
-
-void FlattenRequest::send() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " send" << dendl;
-
-  m_state = STATE_FLATTEN_OBJECTS;
-  AsyncObjectThrottle<>::ContextFactory context_factory(
-    boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject>(),
-      boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
-      boost::lambda::_2));
-  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
-    this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
-    0, m_overlap_objects);
-  throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-bool FlattenRequest::send_update_header() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-
-  ldout(cct, 5) << this << " send_update_header" << dendl;
-  m_state = STATE_UPDATE_HEADER;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  {
-    RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
-    // stop early if the parent went away - it just means
-    // another flatten finished first, so this one is useless.
-    if (!m_image_ctx.parent) {
-      ldout(cct, 5) << "image already flattened" << dendl;
-      return true;
-    }
-    m_parent_spec = m_image_ctx.parent_md.spec;
-  }
-  m_ignore_enoent = true;
-
-  // remove parent from this (base) image
-  librados::ObjectWriteOperation op;
-  if (m_image_ctx.image_watcher->is_lock_supported()) {
-    m_image_ctx.image_watcher->assert_header_locked(&op);
-  }
-  cls_client::remove_parent(&op);
-
-  librados::AioCompletion *rados_completion = create_callback_completion();
-  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
-                                        rados_completion, &op);
-  assert(r == 0);
-  rados_completion->release();
-  return false;
-}
-
-bool FlattenRequest::send_update_children() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  // if there are no snaps, remove from the children object as well
-  // (if snapshots remain, they have their own parent info, and the child
-  // will be removed when the last snap goes away)
-  RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-  if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
-      !m_image_ctx.snaps.empty()) {
-    return true;
-  }
-
-  ldout(cct, 2) << "removing child from children list..." << dendl;
-  m_state = STATE_UPDATE_CHILDREN;
-
-  librados::ObjectWriteOperation op;
-  cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
-
-  librados::AioCompletion *rados_completion = create_callback_completion();
-  int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
-                                    &op);
-  assert(r == 0);
-  rados_completion->release();
-  return false;
-}
-
-} // namespace librbd
diff --git a/src/librbd/FlattenRequest.h b/src/librbd/FlattenRequest.h
deleted file mode 100644 (file)
index 02155f2..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_FLATTEN_REQUEST_H
-#define CEPH_LIBRBD_FLATTEN_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-#include "librbd/parent_types.h"
-#include "common/snap_types.h"
-
-namespace librbd {
-
-class ImageCtx;
-class ProgressContext;
-
-class FlattenRequest : public AsyncRequest<>
-{
-public:
-  FlattenRequest(ImageCtx &image_ctx, Context *on_finish,
-                     uint64_t object_size, uint64_t overlap_objects,
-                     const ::SnapContext &snapc, ProgressContext &prog_ctx)
-    : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
-      m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
-      m_ignore_enoent(false)
-  {
-  }
-
-  virtual void send();
-
-protected:
-  virtual bool should_complete(int r);
-
-private:
-  /**
-   * Flatten goes through the following state machine to copyup objects
-   * from the parent image:
-   *
-   * @verbatim
-   *
-   * <start>
-   *    |
-   *    v
-   * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
-   *           .                         |                  .
-   *           .                         |                  .
-   *           .                         v                  .
-   *           .               STATE_UPDATE_CHILDREN        .
-   *           .                         |                  .
-   *           .                         |                  .
-   *           .                         \---> <finish> < . .
-   *           .                                   ^
-   *           .                                   .
-   *           . . . . . . . . . . . . . . . . . . .
-   *
-   * @endverbatim
-   *
-   * The _UPDATE_CHILDREN state will be skipped if the image has one or
-   * more snapshots. The _UPDATE_HEADER state will be skipped if the
-   * image was concurrently flattened by another client.
-   */
-  enum State {
-    STATE_FLATTEN_OBJECTS,
-    STATE_UPDATE_HEADER,
-    STATE_UPDATE_CHILDREN
-  };
-
-  uint64_t m_object_size;
-  uint64_t m_overlap_objects;
-  ::SnapContext m_snapc;
-  ProgressContext &m_prog_ctx;
-  State m_state;
-
-  parent_spec m_parent_spec;
-  bool m_ignore_enoent;
-
-  bool send_update_header();
-  bool send_update_children();
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_FLATTEN_REQUEST_H
index d19c7c4515f9e3cc84aa793cab699de9baea9038..2f0bb39c8e1b751cde183d96c518e12def305791 100644 (file)
@@ -19,7 +19,7 @@
 #include "librbd/Journal.h"
 #include "librbd/LibrbdAdminSocketHook.h"
 #include "librbd/ObjectMap.h"
-#include "librbd/ResizeRequest.h"
+#include "librbd/operation/ResizeRequest.h"
 
 #include <boost/bind.hpp>
 
index 15b476d42b710288bf00d027d1139e23a2e5e81c..d7010e3b91363ff87781871f1560f098550315fd 100644 (file)
@@ -44,7 +44,10 @@ namespace librbd {
   class LibrbdAdminSocketHook;
   class ImageWatcher;
   class Journal;
+
+  namespace operation {
   class ResizeRequest;
+  }
 
   struct ImageCtx {
     CephContext *cct;
@@ -133,7 +136,7 @@ namespace librbd {
 
     atomic_t async_request_seq;
 
-    xlist<ResizeRequest*> resize_reqs;
+    xlist<operation::ResizeRequest*> resize_reqs;
 
     AioImageRequestWQ *aio_work_queue;
     ContextWQ *op_work_queue;
index dab84c61b2f1c805521c870af8f2faeb4d7cab82..c8d840e906067ec9048a66a65662351144a44cad 100644 (file)
@@ -20,15 +20,15 @@ librbd_internal_la_SOURCES = \
        librbd/ImageCtx.cc \
        librbd/ImageWatcher.cc \
        librbd/internal.cc \
-       librbd/FlattenRequest.cc \
        librbd/Journal.cc \
        librbd/JournalReplay.cc \
        librbd/LibrbdAdminSocketHook.cc \
        librbd/LibrbdWriteback.cc \
        librbd/ObjectMap.cc \
-       librbd/RebuildObjectMapRequest.cc \
-       librbd/ResizeRequest.cc \
-       librbd/TrimRequest.cc
+       librbd/operation/FlattenRequest.cc \
+       librbd/operation/RebuildObjectMapRequest.cc \
+       librbd/operation/ResizeRequest.cc \
+       librbd/operation/TrimRequest.cc
 noinst_LTLIBRARIES += librbd_internal.la
 
 librbd_api_la_SOURCES = \
@@ -66,7 +66,6 @@ noinst_HEADERS += \
        librbd/ImageCtx.h \
        librbd/ImageWatcher.h \
        librbd/internal.h \
-       librbd/FlattenRequest.h \
        librbd/Journal.h \
        librbd/JournalReplay.h \
        librbd/JournalTypes.h \
@@ -74,12 +73,13 @@ noinst_HEADERS += \
        librbd/LibrbdWriteback.h \
        librbd/ObjectMap.h \
        librbd/parent_types.h \
-       librbd/RebuildObjectMapRequest.h \
-       librbd/ResizeRequest.h \
        librbd/SnapInfo.h \
        librbd/TaskFinisher.h \
-       librbd/TrimRequest.h \
-       librbd/WatchNotifyTypes.h
+       librbd/WatchNotifyTypes.h \
+       librbd/operation/FlattenRequest.h \
+       librbd/operation/RebuildObjectMapRequest.h \
+       librbd/operation/ResizeRequest.h \
+       librbd/operation/TrimRequest.h
 
 endif # WITH_RBD
 endif # WITH_RADOS
diff --git a/src/librbd/RebuildObjectMapRequest.cc b/src/librbd/RebuildObjectMapRequest.cc
deleted file mode 100644 (file)
index 5d5a8f0..0000000
+++ /dev/null
@@ -1,355 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "librbd/RebuildObjectMapRequest.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "librbd/ResizeRequest.h"
-#include "librbd/TrimRequest.h"
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: "
-
-namespace librbd {
-
-namespace {
-
-class C_VerifyObject : public C_AsyncObjectThrottle<> {
-public:
-  C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
-                 uint64_t snap_id, uint64_t object_no)
-    : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id),
-      m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no))
-  {
-    m_io_ctx.dup(m_image_ctx.md_ctx);
-    m_io_ctx.snap_set_read(CEPH_SNAPDIR);
-  }
-
-  virtual void complete(int r) {
-    if (should_complete(r)) {
-      ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed "
-                                 << dendl;
-      finish(r);
-      delete this;
-    }
-  }
-
-  virtual int send() {
-    send_list_snaps();
-    return 0;
-  }
-
-private:
-  librados::IoCtx m_io_ctx;
-  uint64_t m_snap_id;
-  uint64_t m_object_no;
-  std::string m_oid;
-
-  librados::snap_set_t m_snap_set;
-  int m_snap_list_ret;
-
-  bool should_complete(int r) {
-    CephContext *cct = m_image_ctx.cct;
-    if (r == 0) {
-      r = m_snap_list_ret;
-    }
-    if (r < 0 && r != -ENOENT) {
-      lderr(cct) << m_oid << " C_VerifyObject::should_complete: "
-                 << "encountered an error: " << cpp_strerror(r) << dendl;
-      return true;
-    }
-
-    ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r="
-                   << r << dendl;
-    return update_object_map(get_object_state());
-  }
-
-  void send_list_snaps() {
-    assert(m_image_ctx.owner_lock.is_locked());
-    ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps"
-                              << dendl;
-
-    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
-      this, NULL, rados_ctx_cb);
-
-    librados::ObjectReadOperation op;
-    op.list_snaps(&m_snap_set, &m_snap_list_ret);
-
-    int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL);
-    assert(r == 0);
-    comp->release();
-  }
-
-  uint8_t get_object_state() {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    for (std::vector<librados::clone_info_t>::const_iterator r =
-           m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) {
-      librados::snap_t from_snap_id;
-      librados::snap_t to_snap_id;
-      if (r->cloneid == librados::SNAP_HEAD) {
-        from_snap_id = next_valid_snap_id(m_snap_set.seq + 1);
-        to_snap_id = librados::SNAP_HEAD;
-      } else {
-        from_snap_id = next_valid_snap_id(r->snaps[0]);
-        to_snap_id = r->snaps[r->snaps.size()-1];
-      }
-
-      if (to_snap_id < m_snap_id) {
-        continue;
-      } else if (m_snap_id < from_snap_id) {
-        break;
-      }
-
-      if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 &&
-          from_snap_id != m_snap_id) {
-        return OBJECT_EXISTS_CLEAN;
-      }
-      return OBJECT_EXISTS;
-    }
-    return OBJECT_NONEXISTENT;
-  }
-
-  uint64_t next_valid_snap_id(uint64_t snap_id) {
-    assert(m_image_ctx.snap_lock.is_locked());
-
-    std::map<librados::snap_t, SnapInfo>::iterator it =
-      m_image_ctx.snap_info.lower_bound(snap_id);
-    if (it == m_image_ctx.snap_info.end()) {
-      return CEPH_NOSNAP;
-    }
-    return it->first;
-  }
-
-  bool update_object_map(uint8_t new_state) {
-    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-    CephContext *cct = m_image_ctx.cct;
-
-    // should have been canceled prior to releasing lock
-    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-           m_image_ctx.image_watcher->is_lock_owner());
-
-    RWLock::WLocker l(m_image_ctx.object_map_lock);
-    uint8_t state = m_image_ctx.object_map[m_object_no];
-    if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT &&
-        m_snap_id == CEPH_NOSNAP) {
-      // might be writing object to OSD concurrently
-      new_state = state;
-    }
-
-    if (new_state != state) {
-      ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map "
-                     << static_cast<uint32_t>(state) << "->"
-                     << static_cast<uint32_t>(new_state) << dendl;
-      m_image_ctx.object_map[m_object_no] = new_state;
-    }
-    return true;
-  }
-};
-
-} // anonymous namespace
-
-
-void RebuildObjectMapRequest::send() {
-  send_resize_object_map();
-}
-
-bool RebuildObjectMapRequest::should_complete(int r) {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-
-  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
-  switch (m_state) {
-  case STATE_RESIZE_OBJECT_MAP:
-    ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl;
-    if (r == -ESTALE && !m_attempted_trim) {
-      // objects are still flagged as in-use -- delete them
-      m_attempted_trim = true;
-      send_trim_image();
-      return false;
-    } else if (r == 0) {
-      send_verify_objects();
-    }
-    break;
-
-  case STATE_TRIM_IMAGE:
-    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
-    if (r == 0) {
-      send_resize_object_map();
-    }
-    break;
-
-  case STATE_VERIFY_OBJECTS:
-    ldout(cct, 5) << "VERIFY_OBJECTS" << dendl;
-    if (r == 0) {
-      send_save_object_map();
-    }
-    break;
-
-  case STATE_SAVE_OBJECT_MAP:
-    ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl;
-    if (r == 0) {
-      send_update_header();
-    }
-    break;
-  case STATE_UPDATE_HEADER:
-    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
-    if (r == 0) {
-      return true;
-    }
-    break;
-
-  default:
-    assert(false);
-    break;
-  }
-
-  if (r < 0) {
-    lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r)
-               << dendl;
-    return true;
-  }
-  return false;
-}
-
-void RebuildObjectMapRequest::send_resize_object_map() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-
-  uint64_t num_objects;
-  uint64_t size;
-  {
-    RWLock::RLocker l(m_image_ctx.snap_lock);
-    size = get_image_size();
-    num_objects = Striper::get_num_objects(m_image_ctx.layout, size);
-  }
-
-  if (m_image_ctx.object_map.size() == num_objects) {
-    send_verify_objects();
-    return;
-  }
-
-  ldout(cct, 5) << this << " send_resize_object_map" << dendl;
-  m_state = STATE_RESIZE_OBJECT_MAP;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-  m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT,
-                                    create_callback_context());
-}
-
-void RebuildObjectMapRequest::send_trim_image() {
-  CephContext *cct = m_image_ctx.cct;
-
-  RWLock::RLocker l(m_image_ctx.owner_lock);
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-  ldout(cct, 5) << this << " send_trim_image" << dendl;
-  m_state = STATE_TRIM_IMAGE;
-
-  uint64_t new_size;
-  uint64_t orig_size;
-  {
-    RWLock::RLocker l(m_image_ctx.snap_lock);
-    new_size = get_image_size();
-    orig_size = m_image_ctx.get_object_size() *
-                m_image_ctx.object_map.size();
-  }
-  TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
-                                     orig_size, new_size, m_prog_ctx);
-  req->send();
-}
-
-void RebuildObjectMapRequest::send_verify_objects() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-
-  uint64_t snap_id;
-  uint64_t num_objects;
-  {
-    RWLock::RLocker l(m_image_ctx.snap_lock);
-    snap_id = m_image_ctx.snap_id;
-    num_objects = Striper::get_num_objects(m_image_ctx.layout,
-                                           m_image_ctx.get_image_size(snap_id));
-  }
-
-  if (num_objects == 0) {
-    send_save_object_map();
-    return;
-  }
-
-  m_state = STATE_VERIFY_OBJECTS;
-  ldout(cct, 5) << this << " send_verify_objects" << dendl;
-
-  AsyncObjectThrottle<>::ContextFactory context_factory(
-    boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObject>(),
-      boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2));
-  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
-    this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
-    0, num_objects);
-  throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
-}
-
-void RebuildObjectMapRequest::send_save_object_map() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-
-  ldout(cct, 5) << this << " send_save_object_map" << dendl;
-  m_state = STATE_SAVE_OBJECT_MAP;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-  m_image_ctx.object_map.aio_save(create_callback_context());
-}
-
-void RebuildObjectMapRequest::send_update_header() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl;
-  m_state = STATE_UPDATE_HEADER;
-
-  librados::ObjectWriteOperation op;
-  if (m_image_ctx.image_watcher->is_lock_supported()) {
-    m_image_ctx.image_watcher->assert_header_locked(&op);
-  }
-
-  uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID;
-  cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags);
-
-  librados::AioCompletion *comp = create_callback_completion();
-  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
-  assert(r == 0);
-  comp->release();
-
-  RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
-  m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false);
-}
-
-uint64_t RebuildObjectMapRequest::get_image_size() const {
-  assert(m_image_ctx.snap_lock.is_locked());
-  if (m_image_ctx.snap_id == CEPH_NOSNAP) {
-    if (!m_image_ctx.resize_reqs.empty()) {
-      return m_image_ctx.resize_reqs.front()->get_image_size();
-    } else {
-      return m_image_ctx.size;
-    }
-  }
-  return  m_image_ctx.get_image_size(m_image_ctx.snap_id);
-}
-
-} // namespace librbd
diff --git a/src/librbd/RebuildObjectMapRequest.h b/src/librbd/RebuildObjectMapRequest.h
deleted file mode 100644 (file)
index 02a41ef..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
-#define CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
-
-#include "include/int_types.h"
-#include "librbd/AsyncRequest.h"
-
-namespace librbd {
-
-class ImageCtx;
-class ProgressContext;
-
-class RebuildObjectMapRequest : public AsyncRequest<> {
-public:
-
-  RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish,
-                          ProgressContext &prog_ctx)
-    : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx),
-      m_prog_ctx(prog_ctx), m_attempted_trim(false)
-  {
-  }
-
-  virtual void send();
-
-protected:
-  virtual bool should_complete(int r);
-
-private:
-  /**
-   * Rebuild object map goes through the following state machine to
-   * verify per-object state:
-   *
-   * <start>
-   *  .   |               . . . . . . . . . .
-   *  .   |               .                 .
-   *  .   v               v                 .
-   *  . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE
-   *  .          |
-   *  .          v
-   *  . . . > STATE_VERIFY_OBJECTS
-   *             |
-   *             v
-   *          STATE_SAVE_OBJECT_MAP
-   *             |
-   *             v
-   *          STATE_UPDATE_HEADER
-   *
-   * The _RESIZE_OBJECT_MAP state will be skipped if the object map
-   * is appropriately sized for the image. The _TRIM_IMAGE state will
-   * only be hit if the resize failed due to an in-use object.
-   */
-  enum State {
-    STATE_RESIZE_OBJECT_MAP,
-    STATE_TRIM_IMAGE,
-    STATE_VERIFY_OBJECTS,
-    STATE_SAVE_OBJECT_MAP,
-    STATE_UPDATE_HEADER
-  };
-
-  ImageCtx &m_image_ctx;
-  ProgressContext &m_prog_ctx;
-  State m_state;
-  bool m_attempted_trim;
-
-  void send_resize_object_map();
-  void send_trim_image();
-  void send_verify_objects();
-  void send_save_object_map();
-  void send_update_header();
-
-  uint64_t get_image_size() const;
-
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_REBUILD_OBJECT_MAP_REQUEST_H
diff --git a/src/librbd/ResizeRequest.cc b/src/librbd/ResizeRequest.cc
deleted file mode 100644 (file)
index 186e730..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include "librbd/ResizeRequest.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "librbd/TrimRequest.h"
-#include "common/dout.h"
-#include "common/errno.h"
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::ResizeRequest: "
-
-namespace librbd
-{
-
-ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish,
-                                       uint64_t new_size,
-                                       ProgressContext &prog_ctx)
-  : AsyncRequest(image_ctx, on_finish),
-    m_original_size(0), m_new_size(new_size),
-    m_prog_ctx(prog_ctx), m_new_parent_overlap(0),
-    m_xlist_item(this)
-{
-}
-
-ResizeRequest::~ResizeRequest() {
-  ResizeRequest *next_req = NULL;
-  {
-    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
-    assert(m_xlist_item.remove_myself());
-    if (!m_image_ctx.resize_reqs.empty()) {
-      next_req = m_image_ctx.resize_reqs.front();
-    }
-  }
-
-  if (next_req != NULL) {
-    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-    next_req->send();
-  }
-}
-
-bool ResizeRequest::should_complete(int r) {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-
-  if (r < 0) {
-    lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
-    return true;
-  }
-  if (m_state == STATE_FINISHED) {
-    ldout(cct, 5) << "FINISHED" << dendl;
-    return true;
-  }
-
-  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
-  switch (m_state) {
-  case STATE_FLUSH:
-    ldout(cct, 5) << "FLUSH" << dendl;
-    send_invalidate_cache();
-    break;
-
-  case STATE_INVALIDATE_CACHE:
-    ldout(cct, 5) << "INVALIDATE_CACHE" << dendl;
-    send_trim_image();
-    break;
-
-  case STATE_TRIM_IMAGE:
-    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
-    send_update_header();
-    break;
-
-  case STATE_GROW_OBJECT_MAP:
-    ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
-    send_update_header();
-    break;
-
-  case STATE_UPDATE_HEADER:
-    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
-    if (send_shrink_object_map()) {
-      update_size_and_overlap();
-      return true;
-    }
-    break;
-
-  case STATE_SHRINK_OBJECT_MAP:
-    ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
-    update_size_and_overlap();
-    return true;
-
-  default:
-    lderr(cct) << "invalid state: " << m_state << dendl;
-    assert(false);
-    break;
-  }
-  return false;
-}
-
-void ResizeRequest::send() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  {
-    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
-    if (!m_xlist_item.is_on_list()) {
-      m_image_ctx.resize_reqs.push_back(&m_xlist_item);
-      if (m_image_ctx.resize_reqs.front() != this) {
-        return;
-      }
-    }
-
-    assert(m_image_ctx.resize_reqs.front() == this);
-    m_original_size = m_image_ctx.size;
-    compute_parent_overlap();
-  }
-
-  CephContext *cct = m_image_ctx.cct;
-  if (is_canceled()) {
-    complete(-ERESTART);
-  } else if (m_original_size == m_new_size) {
-    ldout(cct, 2) << this << " no change in size (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    m_state = STATE_FINISHED;
-    complete(0);
-  } else if (m_new_size > m_original_size) {
-    ldout(cct, 2) << this << " expanding image (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    send_grow_object_map();
-  } else {
-    ldout(cct, 2) << this << " shrinking image (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    send_flush();
-  }
-}
-
-void ResizeRequest::send_flush() {
-  ldout(m_image_ctx.cct, 5) << this << " send_flush: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_FLUSH;
-
-  // with clipping adjusted, ensure that write / copy-on-read operations won't
-  // (re-)create objects that we just removed. need async callback to ensure
-  // we don't have cache_lock already held
-  m_image_ctx.flush_async_operations(create_async_callback_context());
-}
-
-void ResizeRequest::send_invalidate_cache() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_INVALIDATE_CACHE;
-
-  // need to invalidate since we're deleting objects, and
-  // ObjectCacher doesn't track non-existent objects
-  m_image_ctx.invalidate_cache(create_callback_context());
-}
-
-void ResizeRequest::send_trim_image() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_TRIM_IMAGE;
-
-  TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
-                                    m_original_size, m_new_size, m_prog_ctx);
-  req->send();
-}
-
-void ResizeRequest::send_grow_object_map() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  if (!m_image_ctx.object_map.enabled()) {
-    send_update_header();
-    return;
-  }
-
-  ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_GROW_OBJECT_MAP;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
-                                   create_callback_context());
-}
-
-bool ResizeRequest::send_shrink_object_map() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) {
-    return true;
-  }
-
-  ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
-                           << " original_size=" << m_original_size
-                           << " new_size=" << m_new_size << dendl;
-  m_state = STATE_SHRINK_OBJECT_MAP;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
-                                   create_callback_context());
-  return false;
-}
-
-void ResizeRequest::send_update_header() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_UPDATE_HEADER;
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  librados::ObjectWriteOperation op;
-  if (m_image_ctx.old_format) {
-    // rewrite only the size field of the header
-    // NOTE: format 1 image headers are not stored in fixed endian format
-    bufferlist bl;
-    bl.append(reinterpret_cast<const char*>(&m_new_size), sizeof(m_new_size));
-    op.write(offsetof(rbd_obj_header_ondisk, image_size), bl);
-  } else {
-    if (m_image_ctx.image_watcher->is_lock_supported()) {
-      m_image_ctx.image_watcher->assert_header_locked(&op);
-    }
-    cls_client::set_size(&op, m_new_size);
-  }
-
-  librados::AioCompletion *rados_completion = create_callback_completion();
-  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
-                                    rados_completion, &op);
-  assert(r == 0);
-  rados_completion->release();
-}
-
-void ResizeRequest::compute_parent_overlap() {
-  RWLock::RLocker l2(m_image_ctx.parent_lock);
-  if (m_image_ctx.parent == NULL) {
-    m_new_parent_overlap = 0;
-  } else {
-    m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap);
-  }
-}
-
-void ResizeRequest::update_size_and_overlap() {
-  RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
-  m_image_ctx.size = m_new_size;
-
-  RWLock::WLocker parent_locker(m_image_ctx.parent_lock);
-  if (m_image_ctx.parent != NULL && m_new_size < m_original_size) {
-    m_image_ctx.parent_md.overlap = m_new_parent_overlap;
-  }
-}
-
-} // namespace librbd
diff --git a/src/librbd/ResizeRequest.h b/src/librbd/ResizeRequest.h
deleted file mode 100644 (file)
index 8a04478..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_RESIZE_REQUEST_H
-#define CEPH_LIBRBD_RESIZE_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-#include "include/xlist.h"
-
-namespace librbd
-{
-
-class ImageCtx;
-class ProgressContext;
-
-class ResizeRequest : public AsyncRequest<>
-{
-public:
-  ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size,
-                     ProgressContext &prog_ctx);
-  virtual ~ResizeRequest();
-
-  virtual void send();
-
-  inline bool shrinking() const {
-    return m_new_size < m_original_size;
-  }
-
-  inline uint64_t get_image_size() const {
-    return m_new_size;
-  }
-
-private:
-  /**
-   * Resize goes through the following state machine to resize the image
-   * and update the object map:
-   *
-   * @verbatim
-   *
-   * <start> -------------> STATE_FINISHED -----------------------------\
-   *  |  .    (no change)                                               |
-   *  |  .                                                              |
-   *  |  . . . . . . . . . . . . . . . . . . . . .                      |
-   *  |                                          .                      |
-   *  |                                          v                      |
-   *  |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------|
-   *  | (grow)                                                          |
-   *  |                                                                 |
-   *  |                                                                 |
-   *  \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE    |
-   *    (shrink)                                 |                      |
-   *                                             |                      |
-   *                      /----------------------/                      |
-   *                      |                                             |
-   *                      v                                             |
-   *              STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . .  |
-   *                                             |                   .  |
-   *                                             |                   .  |
-   *                                             v                   v  v
-   *                                  STATE_SHRINK_OBJECT_MAP ---> <finish>
-   *
-   * @endverbatim
-   *
-   * The _OBJECT_MAP states are skipped if the object map isn't enabled.
-   * The state machine will immediately transition to _FINISHED if there
-   * are no objects to trim.
-   */
-  enum State {
-    STATE_FLUSH,
-    STATE_INVALIDATE_CACHE,
-    STATE_TRIM_IMAGE,
-    STATE_GROW_OBJECT_MAP,
-    STATE_UPDATE_HEADER,
-    STATE_SHRINK_OBJECT_MAP,
-    STATE_FINISHED
-  };
-
-  State m_state;
-  uint64_t m_original_size;
-  uint64_t m_new_size;
-  ProgressContext &m_prog_ctx;
-  uint64_t m_new_parent_overlap;
-
-  xlist<ResizeRequest *>::item m_xlist_item;
-
-  virtual bool should_complete(int r);
-
-  void send_flush();
-  void send_invalidate_cache();
-  void send_trim_image();
-  void send_grow_object_map();
-  bool send_shrink_object_map();
-  void send_update_header();
-
-  void compute_parent_overlap();
-  void update_size_and_overlap();
-
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_RESIZE_REQUEST_H
diff --git a/src/librbd/TrimRequest.cc b/src/librbd/TrimRequest.cc
deleted file mode 100644 (file)
index afef4d9..0000000
+++ /dev/null
@@ -1,356 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include "librbd/TrimRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
-#include "librbd/AioObjectRequest.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
-#include "common/ContextCompletion.h"
-#include "common/dout.h"
-#include "common/errno.h"
-#include "osdc/Striper.h"
-
-#include <boost/bind.hpp>
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
-#include <boost/scope_exit.hpp>
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::TrimRequest: "
-
-namespace librbd
-{
-
-class C_CopyupObject : public C_AsyncObjectThrottle<> {
-public:
-  C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
-                 ::SnapContext snapc, uint64_t object_no)
-    : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc),
-      m_object_no(object_no)
-  {
-  }
-
-  virtual int send() {
-    assert(m_image_ctx.owner_lock.is_locked());
-    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-           m_image_ctx.image_watcher->is_lock_owner());
-
-    string oid = m_image_ctx.get_object_name(m_object_no);
-    ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
-
-    AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no,
-                                              m_snapc, this);
-    req->send();
-    return 0;
-  }
-private:
-  ::SnapContext m_snapc;
-  uint64_t m_object_no;
-};
-
-class C_RemoveObject : public C_AsyncObjectThrottle<> {
-public:
-  C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
-                 uint64_t object_no)
-    : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no)
-  {
-  }
-
-  virtual int send() {
-    assert(m_image_ctx.owner_lock.is_locked());
-    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-           m_image_ctx.image_watcher->is_lock_owner());
-    if (!m_image_ctx.object_map.object_may_exist(m_object_no)) {
-      return 1;
-    }
-
-    string oid = m_image_ctx.get_object_name(m_object_no);
-    ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
-
-    librados::AioCompletion *rados_completion =
-      librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
-    int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
-    assert(r == 0);
-    rados_completion->release();
-    return 0;
-  }
-
-private:
-  uint64_t m_object_no;
-};
-
-TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish,
-                         uint64_t original_size, uint64_t new_size,
-                         ProgressContext &prog_ctx)
-  : AsyncRequest(image_ctx, on_finish), m_new_size(new_size),
-    m_prog_ctx(prog_ctx)
-{
-  uint64_t period = m_image_ctx.get_stripe_period();
-  uint64_t new_num_periods = ((m_new_size + period - 1) / period);
-  m_delete_off = MIN(new_num_periods * period, original_size);
-  // first object we can delete free and clear
-  m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
-  m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
-
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 10) << this << " trim image " << original_size << " -> "
-                << m_new_size << " periods " << new_num_periods
-                 << " discard to offset " << m_delete_off
-                 << " delete objects " << m_delete_start
-                 << " to " << m_num_objects << dendl;
-}
-
-
-bool TrimRequest::should_complete(int r)
-{
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
-  if (r < 0) {
-    lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
-    return true;
-  }
-
-  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
-  switch (m_state) {
-  case STATE_COPYUP_OBJECTS:
-    ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
-    send_pre_remove();
-    break;
-
-  case STATE_PRE_REMOVE:
-    ldout(cct, 5) << " PRE_REMOVE" << dendl;
-    send_remove_objects();
-    break;
-
-  case STATE_REMOVE_OBJECTS:
-    ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
-    send_post_remove();
-    break;
-
-  case STATE_POST_REMOVE:
-    ldout(cct, 5) << " POST_OBJECTS" << dendl;
-    send_clean_boundary();
-    break;
-
-  case STATE_CLEAN_BOUNDARY:
-    ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
-    finish(0);
-    break;
-
-  case STATE_FINISHED:
-    ldout(cct, 5) << "FINISHED" << dendl;
-    return true;
-
-  default:
-    lderr(cct) << "invalid state: " << m_state << dendl;
-    assert(false);
-    break;
-  }
-  return false;
-}
-
-void TrimRequest::send() {
-  send_copyup_objects();
-}
-
-void TrimRequest::send_copyup_objects() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-
-  if (m_delete_start >= m_num_objects) {
-    send_clean_boundary();
-    return;
-  }
-
-  ::SnapContext snapc;
-  bool has_snapshots;
-  uint64_t parent_overlap;
-  {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
-
-    snapc = m_image_ctx.snapc;
-    has_snapshots = !m_image_ctx.snaps.empty();
-    int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(),
-                                           &parent_overlap);
-    assert(r == 0);
-  }
-
-  // copyup is only required for portion of image that overlaps parent
-  uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout,
-                                                 parent_overlap);
-  // TODO: protect against concurrent shrink and snap create?
-  if (copyup_end <= m_delete_start || !has_snapshots) {
-    send_pre_remove();
-    return;
-  }
-
-  uint64_t copyup_start = m_delete_start;
-  m_delete_start = copyup_end;
-
-  ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: "
-                           << " start object=" << copyup_start << ", "
-                           << " end object=" << copyup_end << dendl;
-  m_state = STATE_COPYUP_OBJECTS;
-
-  Context *ctx = create_callback_context();
-  AsyncObjectThrottle<>::ContextFactory context_factory(
-    boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject>(),
-      boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2));
-  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
-    this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
-    copyup_end);
-  throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-void TrimRequest::send_remove_objects() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
-                           << " delete_start=" << m_delete_start
-                           << " num_objects=" << m_num_objects << dendl;
-  m_state = STATE_REMOVE_OBJECTS;
-
-  Context *ctx = create_callback_context();
-  AsyncObjectThrottle<>::ContextFactory context_factory(
-    boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject>(),
-      boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
-  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
-    this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start,
-    m_num_objects);
-  throttle->start_ops(m_image_ctx.concurrent_management_ops);
-}
-
-void TrimRequest::send_pre_remove() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  if (m_delete_start >= m_num_objects) {
-    send_clean_boundary();
-    return;
-  }
-
-  bool remove_objects = false;
-  {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    if (!m_image_ctx.object_map.enabled()) {
-      remove_objects = true;
-    } else {
-      ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
-                               << " delete_start=" << m_delete_start
-                               << " num_objects=" << m_num_objects << dendl;
-      m_state = STATE_PRE_REMOVE;
-
-      assert(m_image_ctx.image_watcher->is_lock_owner());
-
-      // flag the objects as pending deletion
-      Context *ctx = create_callback_context();
-      RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
-      if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
-                                            OBJECT_PENDING, OBJECT_EXISTS,
-                                             ctx)) {
-        delete ctx;
-        remove_objects = true;
-      }
-    }
-  }
-
-  // avoid possible recursive lock attempts
-  if (remove_objects) {
-    // no object map update required
-    send_remove_objects();
-  }
-}
-
-void TrimRequest::send_post_remove() {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  bool clean_boundary = false;
-  {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    if (!m_image_ctx.object_map.enabled()) {
-      clean_boundary = true;
-    } else {
-      ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
-                               << " delete_start=" << m_delete_start
-                               << " num_objects=" << m_num_objects << dendl;
-      m_state = STATE_POST_REMOVE;
-
-      assert(m_image_ctx.image_watcher->is_lock_owner());
-
-      // flag the pending objects as removed
-      Context *ctx = create_callback_context();
-      RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
-      if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
-                                            OBJECT_NONEXISTENT,
-                                            OBJECT_PENDING, ctx)) {
-        delete ctx;
-       clean_boundary = true;
-      }
-    }
-  }
-
-  // avoid possible recursive lock attempts
-  if (clean_boundary) {
-    // no object map update required
-    send_clean_boundary();
-  }
-}
-
-void TrimRequest::send_clean_boundary() {
-  assert(m_image_ctx.owner_lock.is_locked());
-  CephContext *cct = m_image_ctx.cct;
-  if (m_delete_off <= m_new_size) {
-    finish(0);
-    return;
-  }
-
-  // should have been canceled prior to releasing lock
-  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
-         m_image_ctx.image_watcher->is_lock_owner());
-  uint64_t delete_len = m_delete_off - m_new_size;
-  ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
-                           << " delete_off=" << m_delete_off
-                           << " length=" << delete_len << dendl;
-  m_state = STATE_CLEAN_BOUNDARY;
-
-  ::SnapContext snapc;
-  {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    snapc = m_image_ctx.snapc;
-  }
-
-  // discard the weird boundary
-  std::vector<ObjectExtent> extents;
-  Striper::file_to_extents(cct, m_image_ctx.format_string,
-                          &m_image_ctx.layout, m_new_size, delete_len, 0,
-                           extents);
-
-  ContextCompletion *completion =
-    new ContextCompletion(create_callback_context(), true);
-  for (vector<ObjectExtent>::iterator p = extents.begin();
-       p != extents.end(); ++p) {
-    ldout(cct, 20) << " ex " << *p << dendl;
-    Context *req_comp = new C_ContextCompletion(*completion);
-
-    AioObjectRequest *req;
-    if (p->offset == 0) {
-      req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc,
-                              req_comp);
-    } else {
-      req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno,
-                                  p->offset, snapc, req_comp);
-    }
-    req->send();
-  }
-  completion->finish_adding_requests();
-}
-
-void TrimRequest::finish(int r) {
-  m_state = STATE_FINISHED;
-  async_complete(r);
-}
-
-} // namespace librbd
diff --git a/src/librbd/TrimRequest.h b/src/librbd/TrimRequest.h
deleted file mode 100644 (file)
index 223c600..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_TRIM_REQUEST_H
-#define CEPH_LIBRBD_TRIM_REQUEST_H
-
-#include "librbd/AsyncRequest.h"
-
-namespace librbd
-{
-
-class ImageCtx;
-class ProgressContext;
-
-class TrimRequest : public AsyncRequest<>
-{
-public:
-  TrimRequest(ImageCtx &image_ctx, Context *on_finish,
-                  uint64_t original_size, uint64_t new_size,
-                  ProgressContext &prog_ctx);
-
-  virtual void send();
-
-protected:
-  /**
-   * Trim goes through the following state machine to remove whole objects,
-   * clean partially trimmed objects, and update the object map:
-   *
-   * @verbatim
-   *
-   *     <start> . . . . > STATE_FINISHED . . . . . . . . .
-   *      |   .                                           .
-   *      |   . . . . . . . . . . . .                     .
-   *      |                         .                     .
-   *      v                         .                     .
-   * STATE_COPYUP_OBJECTS . . .     .                     .
-   *      |                   .     .                     .
-   *      |                   .     .                     .
-   *      v                   v     v                     .
-   * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS           .
-   *                                |   .   .             .
-   *        /-----------------------/   .   . . . . . .   .
-   *        |                           .             .   .
-   *        v                           v             v   v
-   * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
-   *        .                                           ^
-   *        .                                           .
-   *        . . . . . . . . . . . . . . . . . . . . . . .
-   *
-   * @endverbatim
-   *
-   * The _COPYUP_OBJECTS state is skipped if there is no parent overlap
-   * within the new image size and the image does not have any snapshots.
-   * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
-   * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
-   * are removed.  The _CLEAN_BOUNDARY state is skipped if no boundary
-   * objects are cleaned.  The state machine will immediately transition
-   * to _FINISHED state if there are no bytes to trim.
-   */ 
-
-  enum State {
-    STATE_COPYUP_OBJECTS,
-    STATE_PRE_REMOVE,
-    STATE_REMOVE_OBJECTS,
-    STATE_POST_REMOVE,
-    STATE_CLEAN_BOUNDARY,
-    STATE_FINISHED
-  };
-
-  virtual bool should_complete(int r);
-
-  State m_state;
-
-private:
-  uint64_t m_delete_start;
-  uint64_t m_num_objects;
-  uint64_t m_delete_off;
-  uint64_t m_new_size;
-  ProgressContext &m_prog_ctx;
-
-  void send_copyup_objects();
-  void send_remove_objects();
-  void send_pre_remove();
-  void send_post_remove();
-  void send_clean_boundary();
-  void finish(int r);
-};
-
-} // namespace librbd
-
-#endif // CEPH_LIBRBD_TRIM_REQUEST_H
index 704252bba2b033e6c63df1c9f83195da0d8bf91a..2cd85bdc36fe2c472e700d1bbf251190b394e09b 100644 (file)
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
-#include "librbd/FlattenRequest.h"
 #include "librbd/Journal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/parent_types.h"
-#include "librbd/RebuildObjectMapRequest.h"
-#include "librbd/ResizeRequest.h"
-#include "librbd/TrimRequest.h"
+#include "librbd/operation/FlattenRequest.h"
+#include "librbd/operation/RebuildObjectMapRequest.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/operation/TrimRequest.h"
 #include "include/util.h"
 
 #include <boost/bind.hpp>
@@ -326,8 +326,8 @@ int invoke_async_request(ImageCtx *ictx, const std::string& request_type,
 
     C_SaferCond ctx;
     ictx->snap_lock.get_read();
-    TrimRequest *req = new TrimRequest(*ictx, &ctx, ictx->size, newsize,
-                                       prog_ctx);
+    operation::TrimRequest *req = new operation::TrimRequest(
+      *ictx, &ctx, ictx->size, newsize, prog_ctx);
     ictx->snap_lock.put_read();
     req->send();
 
@@ -2335,7 +2335,8 @@ reprotect_and_return_err:
                            ProgressContext& prog_ctx)
   {
     assert(ictx->owner_lock.is_locked());
-    ResizeRequest *req = new ResizeRequest(*ictx, ctx, new_size, prog_ctx);
+    operation::ResizeRequest *req = new operation::ResizeRequest(
+      *ictx, ctx, new_size, prog_ctx);
     req->send();
   }
 
@@ -3338,8 +3339,8 @@ reprotect_and_return_err:
       overlap_objects = Striper::get_num_objects(ictx->layout, overlap);
     }
 
-    FlattenRequest *req = new FlattenRequest(*ictx, ctx, object_size,
-                                             overlap_objects, snapc, prog_ctx);
+    operation::FlattenRequest *req = new operation::FlattenRequest(
+      *ictx, ctx, object_size, overlap_objects, snapc, prog_ctx);
     req->send();
     return 0;
   }
@@ -3389,8 +3390,8 @@ reprotect_and_return_err:
       return r;
     }
 
-    RebuildObjectMapRequest *req = new RebuildObjectMapRequest(*ictx, ctx,
-                                                               prog_ctx);
+    operation::RebuildObjectMapRequest *req =
+      new operation::RebuildObjectMapRequest(*ictx, ctx, prog_ctx);
     req->send();
     return 0;
   }
diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc
new file mode 100644 (file)
index 0000000..e3d9181
--- /dev/null
@@ -0,0 +1,178 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/FlattenRequest.h"
+#include "librbd/AioObjectRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::FlattenRequest: "
+
+namespace librbd {
+namespace operation {
+
+class C_FlattenObject : public C_AsyncObjectThrottle<> {
+public:
+  C_FlattenObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+                  uint64_t object_size, ::SnapContext snapc, uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_size(object_size),
+      m_snapc(snapc), m_object_no(object_no)
+  {
+  }
+
+  virtual int send() {
+    assert(m_image_ctx.owner_lock.is_locked());
+    CephContext *cct = m_image_ctx.cct;
+
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+        !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
+      return -ERESTART;
+    }
+
+    bufferlist bl;
+    string oid = m_image_ctx.get_object_name(m_object_no);
+    AioObjectWrite *req = new AioObjectWrite(&m_image_ctx, oid, m_object_no, 0,
+                                             bl, m_snapc, this);
+    if (!req->has_parent()) {
+      // stop early if the parent went away - it just means
+      // another flatten finished first or the image was resized
+      delete req;
+      return 1;
+    }
+
+    req->send();
+    return 0;
+  }
+
+private:
+  uint64_t m_object_size;
+  ::SnapContext m_snapc;
+  uint64_t m_object_no;
+};
+
+bool FlattenRequest::should_complete(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+  if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
+    lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
+    return true;
+  }
+
+  RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+  switch (m_state) {
+  case STATE_FLATTEN_OBJECTS:
+    ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
+    return send_update_header();
+
+  case STATE_UPDATE_HEADER:
+    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+    return send_update_children();
+
+  case STATE_UPDATE_CHILDREN:
+    ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void FlattenRequest::send() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " send" << dendl;
+
+  m_state = STATE_FLATTEN_OBJECTS;
+  AsyncObjectThrottle<>::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<C_FlattenObject>(),
+      boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
+      boost::lambda::_2));
+  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+    this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
+    0, m_overlap_objects);
+  throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+bool FlattenRequest::send_update_header() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+
+  ldout(cct, 5) << this << " send_update_header" << dendl;
+  m_state = STATE_UPDATE_HEADER;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  {
+    RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
+    // stop early if the parent went away - it just means
+    // another flatten finished first, so this one is useless.
+    if (!m_image_ctx.parent) {
+      ldout(cct, 5) << "image already flattened" << dendl;
+      return true;
+    }
+    m_parent_spec = m_image_ctx.parent_md.spec;
+  }
+  m_ignore_enoent = true;
+
+  // remove parent from this (base) image
+  librados::ObjectWriteOperation op;
+  if (m_image_ctx.image_watcher->is_lock_supported()) {
+    m_image_ctx.image_watcher->assert_header_locked(&op);
+  }
+  cls_client::remove_parent(&op);
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                        rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+  return false;
+}
+
+bool FlattenRequest::send_update_children() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  // if there are no snaps, remove from the children object as well
+  // (if snapshots remain, they have their own parent info, and the child
+  // will be removed when the last snap goes away)
+  RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+  if ((m_image_ctx.features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
+      !m_image_ctx.snaps.empty()) {
+    return true;
+  }
+
+  ldout(cct, 2) << "removing child from children list..." << dendl;
+  m_state = STATE_UPDATE_CHILDREN;
+
+  librados::ObjectWriteOperation op;
+  cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
+                                    &op);
+  assert(r == 0);
+  rados_completion->release();
+  return false;
+}
+
+} // namespace operation
+} // namespace librbd
diff --git a/src/librbd/operation/FlattenRequest.h b/src/librbd/operation/FlattenRequest.h
new file mode 100644 (file)
index 0000000..8474f9c
--- /dev/null
@@ -0,0 +1,84 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "librbd/parent_types.h"
+#include "common/snap_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class FlattenRequest : public AsyncRequest<>
+{
+public:
+  FlattenRequest(ImageCtx &image_ctx, Context *on_finish,
+                     uint64_t object_size, uint64_t overlap_objects,
+                     const ::SnapContext &snapc, ProgressContext &prog_ctx)
+    : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
+      m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
+      m_ignore_enoent(false)
+  {
+  }
+
+  virtual void send();
+
+protected:
+  virtual bool should_complete(int r);
+
+private:
+  /**
+   * Flatten goes through the following state machine to copyup objects
+   * from the parent image:
+   *
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
+   *           .                         |                  .
+   *           .                         |                  .
+   *           .                         v                  .
+   *           .               STATE_UPDATE_CHILDREN        .
+   *           .                         |                  .
+   *           .                         |                  .
+   *           .                         \---> <finish> < . .
+   *           .                                   ^
+   *           .                                   .
+   *           . . . . . . . . . . . . . . . . . . .
+   *
+   * @endverbatim
+   *
+   * The _UPDATE_CHILDREN state will be skipped if the image has one or
+   * more snapshots. The _UPDATE_HEADER state will be skipped if the
+   * image was concurrently flattened by another client.
+   */
+  enum State {
+    STATE_FLATTEN_OBJECTS,
+    STATE_UPDATE_HEADER,
+    STATE_UPDATE_CHILDREN
+  };
+
+  uint64_t m_object_size;
+  uint64_t m_overlap_objects;
+  ::SnapContext m_snapc;
+  ProgressContext &m_prog_ctx;
+  State m_state;
+
+  parent_spec m_parent_spec;
+  bool m_ignore_enoent;
+
+  bool send_update_header();
+  bool send_update_children();
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_FLATTEN_REQUEST_H
diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc
new file mode 100644 (file)
index 0000000..803083c
--- /dev/null
@@ -0,0 +1,357 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/RebuildObjectMapRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/operation/TrimRequest.h"
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::RebuildObjectMapRequest: "
+
+namespace librbd {
+namespace operation {
+
+namespace {
+
+class C_VerifyObject : public C_AsyncObjectThrottle<> {
+public:
+  C_VerifyObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+                 uint64_t snap_id, uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle, *image_ctx), m_snap_id(snap_id),
+      m_object_no(object_no), m_oid(m_image_ctx.get_object_name(m_object_no))
+  {
+    m_io_ctx.dup(m_image_ctx.md_ctx);
+    m_io_ctx.snap_set_read(CEPH_SNAPDIR);
+  }
+
+  virtual void complete(int r) {
+    if (should_complete(r)) {
+      ldout(m_image_ctx.cct, 20) << m_oid << " C_VerifyObject completed "
+                                 << dendl;
+      finish(r);
+      delete this;
+    }
+  }
+
+  virtual int send() {
+    send_list_snaps();
+    return 0;
+  }
+
+private:
+  librados::IoCtx m_io_ctx;
+  uint64_t m_snap_id;
+  uint64_t m_object_no;
+  std::string m_oid;
+
+  librados::snap_set_t m_snap_set;
+  int m_snap_list_ret;
+
+  bool should_complete(int r) {
+    CephContext *cct = m_image_ctx.cct;
+    if (r == 0) {
+      r = m_snap_list_ret;
+    }
+    if (r < 0 && r != -ENOENT) {
+      lderr(cct) << m_oid << " C_VerifyObject::should_complete: "
+                 << "encountered an error: " << cpp_strerror(r) << dendl;
+      return true;
+    }
+
+    ldout(cct, 20) << m_oid << " C_VerifyObject::should_complete: " << " r="
+                   << r << dendl;
+    return update_object_map(get_object_state());
+  }
+
+  void send_list_snaps() {
+    assert(m_image_ctx.owner_lock.is_locked());
+    ldout(m_image_ctx.cct, 5) << m_oid << " C_VerifyObject::send_list_snaps"
+                              << dendl;
+
+    librados::AioCompletion *comp = librados::Rados::aio_create_completion(
+      this, NULL, rados_ctx_cb);
+
+    librados::ObjectReadOperation op;
+    op.list_snaps(&m_snap_set, &m_snap_list_ret);
+
+    int r = m_io_ctx.aio_operate(m_oid, comp, &op, NULL);
+    assert(r == 0);
+    comp->release();
+  }
+
+  uint8_t get_object_state() {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    for (std::vector<librados::clone_info_t>::const_iterator r =
+           m_snap_set.clones.begin(); r != m_snap_set.clones.end(); ++r) {
+      librados::snap_t from_snap_id;
+      librados::snap_t to_snap_id;
+      if (r->cloneid == librados::SNAP_HEAD) {
+        from_snap_id = next_valid_snap_id(m_snap_set.seq + 1);
+        to_snap_id = librados::SNAP_HEAD;
+      } else {
+        from_snap_id = next_valid_snap_id(r->snaps[0]);
+        to_snap_id = r->snaps[r->snaps.size()-1];
+      }
+
+      if (to_snap_id < m_snap_id) {
+        continue;
+      } else if (m_snap_id < from_snap_id) {
+        break;
+      }
+
+      if ((m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0 &&
+          from_snap_id != m_snap_id) {
+        return OBJECT_EXISTS_CLEAN;
+      }
+      return OBJECT_EXISTS;
+    }
+    return OBJECT_NONEXISTENT;
+  }
+
+  uint64_t next_valid_snap_id(uint64_t snap_id) {
+    assert(m_image_ctx.snap_lock.is_locked());
+
+    std::map<librados::snap_t, SnapInfo>::iterator it =
+      m_image_ctx.snap_info.lower_bound(snap_id);
+    if (it == m_image_ctx.snap_info.end()) {
+      return CEPH_NOSNAP;
+    }
+    return it->first;
+  }
+
+  bool update_object_map(uint8_t new_state) {
+    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+    CephContext *cct = m_image_ctx.cct;
+
+    // should have been canceled prior to releasing lock
+    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+           m_image_ctx.image_watcher->is_lock_owner());
+
+    RWLock::WLocker l(m_image_ctx.object_map_lock);
+    uint8_t state = m_image_ctx.object_map[m_object_no];
+    if (state == OBJECT_EXISTS && new_state == OBJECT_NONEXISTENT &&
+        m_snap_id == CEPH_NOSNAP) {
+      // might be writing object to OSD concurrently
+      new_state = state;
+    }
+
+    if (new_state != state) {
+      ldout(cct, 15) << m_oid << " C_VerifyObject::update_object_map "
+                     << static_cast<uint32_t>(state) << "->"
+                     << static_cast<uint32_t>(new_state) << dendl;
+      m_image_ctx.object_map[m_object_no] = new_state;
+    }
+    return true;
+  }
+};
+
+} // anonymous namespace
+
+
+void RebuildObjectMapRequest::send() {
+  send_resize_object_map();
+}
+
+bool RebuildObjectMapRequest::should_complete(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+  switch (m_state) {
+  case STATE_RESIZE_OBJECT_MAP:
+    ldout(cct, 5) << "RESIZE_OBJECT_MAP" << dendl;
+    if (r == -ESTALE && !m_attempted_trim) {
+      // objects are still flagged as in-use -- delete them
+      m_attempted_trim = true;
+      send_trim_image();
+      return false;
+    } else if (r == 0) {
+      send_verify_objects();
+    }
+    break;
+
+  case STATE_TRIM_IMAGE:
+    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+    if (r == 0) {
+      send_resize_object_map();
+    }
+    break;
+
+  case STATE_VERIFY_OBJECTS:
+    ldout(cct, 5) << "VERIFY_OBJECTS" << dendl;
+    if (r == 0) {
+      send_save_object_map();
+    }
+    break;
+
+  case STATE_SAVE_OBJECT_MAP:
+    ldout(cct, 5) << "SAVE_OBJECT_MAP" << dendl;
+    if (r == 0) {
+      send_update_header();
+    }
+    break;
+  case STATE_UPDATE_HEADER:
+    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+    if (r == 0) {
+      return true;
+    }
+    break;
+
+  default:
+    assert(false);
+    break;
+  }
+
+  if (r < 0) {
+    lderr(cct) << "rebuild object map encountered an error: " << cpp_strerror(r)
+               << dendl;
+    return true;
+  }
+  return false;
+}
+
+void RebuildObjectMapRequest::send_resize_object_map() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+
+  uint64_t num_objects;
+  uint64_t size;
+  {
+    RWLock::RLocker l(m_image_ctx.snap_lock);
+    size = get_image_size();
+    num_objects = Striper::get_num_objects(m_image_ctx.layout, size);
+  }
+
+  if (m_image_ctx.object_map.size() == num_objects) {
+    send_verify_objects();
+    return;
+  }
+
+  ldout(cct, 5) << this << " send_resize_object_map" << dendl;
+  m_state = STATE_RESIZE_OBJECT_MAP;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+  m_image_ctx.object_map.aio_resize(size, OBJECT_NONEXISTENT,
+                                    create_callback_context());
+}
+
+void RebuildObjectMapRequest::send_trim_image() {
+  CephContext *cct = m_image_ctx.cct;
+
+  RWLock::RLocker l(m_image_ctx.owner_lock);
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+  ldout(cct, 5) << this << " send_trim_image" << dendl;
+  m_state = STATE_TRIM_IMAGE;
+
+  uint64_t new_size;
+  uint64_t orig_size;
+  {
+    RWLock::RLocker l(m_image_ctx.snap_lock);
+    new_size = get_image_size();
+    orig_size = m_image_ctx.get_object_size() *
+                m_image_ctx.object_map.size();
+  }
+  TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
+                                     orig_size, new_size, m_prog_ctx);
+  req->send();
+}
+
+void RebuildObjectMapRequest::send_verify_objects() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+
+  uint64_t snap_id;
+  uint64_t num_objects;
+  {
+    RWLock::RLocker l(m_image_ctx.snap_lock);
+    snap_id = m_image_ctx.snap_id;
+    num_objects = Striper::get_num_objects(m_image_ctx.layout,
+                                           m_image_ctx.get_image_size(snap_id));
+  }
+
+  if (num_objects == 0) {
+    send_save_object_map();
+    return;
+  }
+
+  m_state = STATE_VERIFY_OBJECTS;
+  ldout(cct, 5) << this << " send_verify_objects" << dendl;
+
+  AsyncObjectThrottle<>::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<C_VerifyObject>(),
+      boost::lambda::_1, &m_image_ctx, snap_id, boost::lambda::_2));
+  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+    this, m_image_ctx, context_factory, create_callback_context(), &m_prog_ctx,
+    0, num_objects);
+  throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+void RebuildObjectMapRequest::send_save_object_map() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+
+  ldout(cct, 5) << this << " send_save_object_map" << dendl;
+  m_state = STATE_SAVE_OBJECT_MAP;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+  m_image_ctx.object_map.aio_save(create_callback_context());
+}
+
+void RebuildObjectMapRequest::send_update_header() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  ldout(m_image_ctx.cct, 5) << this << " send_update_header" << dendl;
+  m_state = STATE_UPDATE_HEADER;
+
+  librados::ObjectWriteOperation op;
+  if (m_image_ctx.image_watcher->is_lock_supported()) {
+    m_image_ctx.image_watcher->assert_header_locked(&op);
+  }
+
+  uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID;
+  cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags);
+
+  librados::AioCompletion *comp = create_callback_completion();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid, comp, &op);
+  assert(r == 0);
+  comp->release();
+
+  RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+  m_image_ctx.update_flags(m_image_ctx.snap_id, flags, false);
+}
+
+uint64_t RebuildObjectMapRequest::get_image_size() const {
+  assert(m_image_ctx.snap_lock.is_locked());
+  if (m_image_ctx.snap_id == CEPH_NOSNAP) {
+    if (!m_image_ctx.resize_reqs.empty()) {
+      return m_image_ctx.resize_reqs.front()->get_image_size();
+    } else {
+      return m_image_ctx.size;
+    }
+  }
+  return  m_image_ctx.get_image_size(m_image_ctx.snap_id);
+}
+
+} // namespace operation
+} // namespace librbd
diff --git a/src/librbd/operation/RebuildObjectMapRequest.h b/src/librbd/operation/RebuildObjectMapRequest.h
new file mode 100644 (file)
index 0000000..3ed49ba
--- /dev/null
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/AsyncRequest.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class RebuildObjectMapRequest : public AsyncRequest<> {
+public:
+
+  RebuildObjectMapRequest(ImageCtx &image_ctx, Context *on_finish,
+                          ProgressContext &prog_ctx)
+    : AsyncRequest(image_ctx, on_finish), m_image_ctx(image_ctx),
+      m_prog_ctx(prog_ctx), m_attempted_trim(false)
+  {
+  }
+
+  virtual void send();
+
+protected:
+  virtual bool should_complete(int r);
+
+private:
+  /**
+   * Rebuild object map goes through the following state machine to
+   * verify per-object state:
+   *
+   * <start>
+   *  .   |               . . . . . . . . . .
+   *  .   |               .                 .
+   *  .   v               v                 .
+   *  . STATE_RESIZE_OBJECT_MAP . . . > STATE_TRIM_IMAGE
+   *  .          |
+   *  .          v
+   *  . . . > STATE_VERIFY_OBJECTS
+   *             |
+   *             v
+   *          STATE_SAVE_OBJECT_MAP
+   *             |
+   *             v
+   *          STATE_UPDATE_HEADER
+   *
+   * The _RESIZE_OBJECT_MAP state will be skipped if the object map
+   * is appropriately sized for the image. The _TRIM_IMAGE state will
+   * only be hit if the resize failed due to an in-use object.
+   */
+  enum State {
+    STATE_RESIZE_OBJECT_MAP,
+    STATE_TRIM_IMAGE,
+    STATE_VERIFY_OBJECTS,
+    STATE_SAVE_OBJECT_MAP,
+    STATE_UPDATE_HEADER
+  };
+
+  ImageCtx &m_image_ctx;
+  ProgressContext &m_prog_ctx;
+  State m_state;
+  bool m_attempted_trim;
+
+  void send_resize_object_map();
+  void send_trim_image();
+  void send_verify_objects();
+  void send_save_object_map();
+  void send_update_header();
+
+  uint64_t get_image_size() const;
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_REBUILD_OBJECT_MAP_REQUEST_H
diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc
new file mode 100644 (file)
index 0000000..0324743
--- /dev/null
@@ -0,0 +1,267 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/ResizeRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/operation/TrimRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ResizeRequest: "
+
+namespace librbd {
+namespace operation {
+
+ResizeRequest::ResizeRequest(ImageCtx &image_ctx, Context *on_finish,
+                                       uint64_t new_size,
+                                       ProgressContext &prog_ctx)
+  : AsyncRequest(image_ctx, on_finish),
+    m_original_size(0), m_new_size(new_size),
+    m_prog_ctx(prog_ctx), m_new_parent_overlap(0),
+    m_xlist_item(this)
+{
+}
+
+ResizeRequest::~ResizeRequest() {
+  ResizeRequest *next_req = NULL;
+  {
+    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+    assert(m_xlist_item.remove_myself());
+    if (!m_image_ctx.resize_reqs.empty()) {
+      next_req = m_image_ctx.resize_reqs.front();
+    }
+  }
+
+  if (next_req != NULL) {
+    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+    next_req->send();
+  }
+}
+
+bool ResizeRequest::should_complete(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
+    return true;
+  }
+  if (m_state == STATE_FINISHED) {
+    ldout(cct, 5) << "FINISHED" << dendl;
+    return true;
+  }
+
+  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+  switch (m_state) {
+  case STATE_FLUSH:
+    ldout(cct, 5) << "FLUSH" << dendl;
+    send_invalidate_cache();
+    break;
+
+  case STATE_INVALIDATE_CACHE:
+    ldout(cct, 5) << "INVALIDATE_CACHE" << dendl;
+    send_trim_image();
+    break;
+
+  case STATE_TRIM_IMAGE:
+    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+    send_update_header();
+    break;
+
+  case STATE_GROW_OBJECT_MAP:
+    ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
+    send_update_header();
+    break;
+
+  case STATE_UPDATE_HEADER:
+    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+    if (send_shrink_object_map()) {
+      update_size_and_overlap();
+      return true;
+    }
+    break;
+
+  case STATE_SHRINK_OBJECT_MAP:
+    ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
+    update_size_and_overlap();
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void ResizeRequest::send() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  {
+    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+    if (!m_xlist_item.is_on_list()) {
+      m_image_ctx.resize_reqs.push_back(&m_xlist_item);
+      if (m_image_ctx.resize_reqs.front() != this) {
+        return;
+      }
+    }
+
+    assert(m_image_ctx.resize_reqs.front() == this);
+    m_original_size = m_image_ctx.size;
+    compute_parent_overlap();
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  if (is_canceled()) {
+    complete(-ERESTART);
+  } else if (m_original_size == m_new_size) {
+    ldout(cct, 2) << this << " no change in size (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl;
+    m_state = STATE_FINISHED;
+    complete(0);
+  } else if (m_new_size > m_original_size) {
+    ldout(cct, 2) << this << " expanding image (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl;
+    send_grow_object_map();
+  } else {
+    ldout(cct, 2) << this << " shrinking image (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl;
+    send_flush();
+  }
+}
+
+void ResizeRequest::send_flush() {
+  ldout(m_image_ctx.cct, 5) << this << " send_flush: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_FLUSH;
+
+  // with clipping adjusted, ensure that write / copy-on-read operations won't
+  // (re-)create objects that we just removed. need async callback to ensure
+  // we don't have cache_lock already held
+  m_image_ctx.flush_async_operations(create_async_callback_context());
+}
+
+void ResizeRequest::send_invalidate_cache() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  ldout(m_image_ctx.cct, 5) << this << " send_invalidate_cache: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_INVALIDATE_CACHE;
+
+  // need to invalidate since we're deleting objects, and
+  // ObjectCacher doesn't track non-existent objects
+  m_image_ctx.invalidate_cache(create_callback_context());
+}
+
+void ResizeRequest::send_trim_image() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_TRIM_IMAGE;
+
+  TrimRequest *req = new TrimRequest(m_image_ctx, create_callback_context(),
+                                    m_original_size, m_new_size, m_prog_ctx);
+  req->send();
+}
+
+void ResizeRequest::send_grow_object_map() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  if (!m_image_ctx.object_map.enabled()) {
+    send_update_header();
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_GROW_OBJECT_MAP;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
+                                   create_callback_context());
+}
+
+bool ResizeRequest::send_shrink_object_map() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  if (!m_image_ctx.object_map.enabled() || m_new_size > m_original_size) {
+    return true;
+  }
+
+  ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
+                           << " original_size=" << m_original_size
+                           << " new_size=" << m_new_size << dendl;
+  m_state = STATE_SHRINK_OBJECT_MAP;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  m_image_ctx.object_map.aio_resize(m_new_size, OBJECT_NONEXISTENT,
+                                   create_callback_context());
+  return false;
+}
+
+void ResizeRequest::send_update_header() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_UPDATE_HEADER;
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  librados::ObjectWriteOperation op;
+  if (m_image_ctx.old_format) {
+    // rewrite only the size field of the header
+    // NOTE: format 1 image headers are not stored in fixed endian format
+    bufferlist bl;
+    bl.append(reinterpret_cast<const char*>(&m_new_size), sizeof(m_new_size));
+    op.write(offsetof(rbd_obj_header_ondisk, image_size), bl);
+  } else {
+    if (m_image_ctx.image_watcher->is_lock_supported()) {
+      m_image_ctx.image_watcher->assert_header_locked(&op);
+    }
+    cls_client::set_size(&op, m_new_size);
+  }
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                    rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+void ResizeRequest::compute_parent_overlap() {
+  RWLock::RLocker l2(m_image_ctx.parent_lock);
+  if (m_image_ctx.parent == NULL) {
+    m_new_parent_overlap = 0;
+  } else {
+    m_new_parent_overlap = MIN(m_new_size, m_image_ctx.parent_md.overlap);
+  }
+}
+
+void ResizeRequest::update_size_and_overlap() {
+  RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+  m_image_ctx.size = m_new_size;
+
+  RWLock::WLocker parent_locker(m_image_ctx.parent_lock);
+  if (m_image_ctx.parent != NULL && m_new_size < m_original_size) {
+    m_image_ctx.parent_md.overlap = m_new_parent_overlap;
+  }
+}
+
+} // namespace operation
+} // namespace librbd
diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h
new file mode 100644 (file)
index 0000000..f7b9410
--- /dev/null
@@ -0,0 +1,104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "include/xlist.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class ResizeRequest : public AsyncRequest<>
+{
+public:
+  ResizeRequest(ImageCtx &image_ctx, Context *on_finish, uint64_t new_size,
+                     ProgressContext &prog_ctx);
+  virtual ~ResizeRequest();
+
+  virtual void send();
+
+  inline bool shrinking() const {
+    return m_new_size < m_original_size;
+  }
+
+  inline uint64_t get_image_size() const {
+    return m_new_size;
+  }
+
+private:
+  /**
+   * Resize goes through the following state machine to resize the image
+   * and update the object map:
+   *
+   * @verbatim
+   *
+   * <start> -------------> STATE_FINISHED -----------------------------\
+   *  |  .    (no change)                                               |
+   *  |  .                                                              |
+   *  |  . . . . . . . . . . . . . . . . . . . . .                      |
+   *  |                                          .                      |
+   *  |                                          v                      |
+   *  |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------|
+   *  | (grow)                                                          |
+   *  |                                                                 |
+   *  |                                                                 |
+   *  \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE    |
+   *    (shrink)                                 |                      |
+   *                                             |                      |
+   *                      /----------------------/                      |
+   *                      |                                             |
+   *                      v                                             |
+   *              STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . .  |
+   *                                             |                   .  |
+   *                                             |                   .  |
+   *                                             v                   v  v
+   *                                  STATE_SHRINK_OBJECT_MAP ---> <finish>
+   *
+   * @endverbatim
+   *
+   * The _OBJECT_MAP states are skipped if the object map isn't enabled.
+   * The state machine will immediately transition to _FINISHED if there
+   * are no objects to trim.
+   */
+  enum State {
+    STATE_FLUSH,
+    STATE_INVALIDATE_CACHE,
+    STATE_TRIM_IMAGE,
+    STATE_GROW_OBJECT_MAP,
+    STATE_UPDATE_HEADER,
+    STATE_SHRINK_OBJECT_MAP,
+    STATE_FINISHED
+  };
+
+  State m_state;
+  uint64_t m_original_size;
+  uint64_t m_new_size;
+  ProgressContext &m_prog_ctx;
+  uint64_t m_new_parent_overlap;
+
+  xlist<ResizeRequest *>::item m_xlist_item;
+
+  virtual bool should_complete(int r);
+
+  void send_flush();
+  void send_invalidate_cache();
+  void send_trim_image();
+  void send_grow_object_map();
+  bool send_shrink_object_map();
+  void send_update_header();
+
+  void compute_parent_overlap();
+  void update_size_and_overlap();
+
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_RESIZE_REQUEST_H
diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc
new file mode 100644 (file)
index 0000000..8d9546d
--- /dev/null
@@ -0,0 +1,358 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/operation/TrimRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AioObjectRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/ContextCompletion.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+
+#include <boost/bind.hpp>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::TrimRequest: "
+
+namespace librbd {
+namespace operation {
+
+class C_CopyupObject : public C_AsyncObjectThrottle<> {
+public:
+  C_CopyupObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+                 ::SnapContext snapc, uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle, *image_ctx), m_snapc(snapc),
+      m_object_no(object_no)
+  {
+  }
+
+  virtual int send() {
+    assert(m_image_ctx.owner_lock.is_locked());
+    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+           m_image_ctx.image_watcher->is_lock_owner());
+
+    string oid = m_image_ctx.get_object_name(m_object_no);
+    ldout(m_image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
+
+    AioObjectRequest *req = new AioObjectTrim(&m_image_ctx, oid, m_object_no,
+                                              m_snapc, this);
+    req->send();
+    return 0;
+  }
+private:
+  ::SnapContext m_snapc;
+  uint64_t m_object_no;
+};
+
+class C_RemoveObject : public C_AsyncObjectThrottle<> {
+public:
+  C_RemoveObject(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
+                 uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no)
+  {
+  }
+
+  virtual int send() {
+    assert(m_image_ctx.owner_lock.is_locked());
+    assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+           m_image_ctx.image_watcher->is_lock_owner());
+    if (!m_image_ctx.object_map.object_may_exist(m_object_no)) {
+      return 1;
+    }
+
+    string oid = m_image_ctx.get_object_name(m_object_no);
+    ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
+
+    librados::AioCompletion *rados_completion =
+      librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
+    int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
+    assert(r == 0);
+    rados_completion->release();
+    return 0;
+  }
+
+private:
+  uint64_t m_object_no;
+};
+
+TrimRequest::TrimRequest(ImageCtx &image_ctx, Context *on_finish,
+                         uint64_t original_size, uint64_t new_size,
+                         ProgressContext &prog_ctx)
+  : AsyncRequest(image_ctx, on_finish), m_new_size(new_size),
+    m_prog_ctx(prog_ctx)
+{
+  uint64_t period = m_image_ctx.get_stripe_period();
+  uint64_t new_num_periods = ((m_new_size + period - 1) / period);
+  m_delete_off = MIN(new_num_periods * period, original_size);
+  // first object we can delete free and clear
+  m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
+  m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " trim image " << original_size << " -> "
+                << m_new_size << " periods " << new_num_periods
+                 << " discard to offset " << m_delete_off
+                 << " delete objects " << m_delete_start
+                 << " to " << m_num_objects << dendl;
+}
+
+
+bool TrimRequest::should_complete(int r)
+{
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
+  if (r < 0) {
+    lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
+    return true;
+  }
+
+  RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
+  switch (m_state) {
+  case STATE_COPYUP_OBJECTS:
+    ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
+    send_pre_remove();
+    break;
+
+  case STATE_PRE_REMOVE:
+    ldout(cct, 5) << " PRE_REMOVE" << dendl;
+    send_remove_objects();
+    break;
+
+  case STATE_REMOVE_OBJECTS:
+    ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
+    send_post_remove();
+    break;
+
+  case STATE_POST_REMOVE:
+    ldout(cct, 5) << " POST_OBJECTS" << dendl;
+    send_clean_boundary();
+    break;
+
+  case STATE_CLEAN_BOUNDARY:
+    ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
+    finish(0);
+    break;
+
+  case STATE_FINISHED:
+    ldout(cct, 5) << "FINISHED" << dendl;
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void TrimRequest::send() {
+  send_copyup_objects();
+}
+
+void TrimRequest::send_copyup_objects() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+
+  if (m_delete_start >= m_num_objects) {
+    send_clean_boundary();
+    return;
+  }
+
+  ::SnapContext snapc;
+  bool has_snapshots;
+  uint64_t parent_overlap;
+  {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    RWLock::RLocker parent_locker(m_image_ctx.parent_lock);
+
+    snapc = m_image_ctx.snapc;
+    has_snapshots = !m_image_ctx.snaps.empty();
+    int r = m_image_ctx.get_parent_overlap(m_image_ctx.get_copyup_snap_id(),
+                                           &parent_overlap);
+    assert(r == 0);
+  }
+
+  // copyup is only required for portion of image that overlaps parent
+  uint64_t copyup_end = Striper::get_num_objects(m_image_ctx.layout,
+                                                 parent_overlap);
+  // TODO: protect against concurrent shrink and snap create?
+  if (copyup_end <= m_delete_start || !has_snapshots) {
+    send_pre_remove();
+    return;
+  }
+
+  uint64_t copyup_start = m_delete_start;
+  m_delete_start = copyup_end;
+
+  ldout(m_image_ctx.cct, 5) << this << " send_copyup_objects: "
+                           << " start object=" << copyup_start << ", "
+                           << " end object=" << copyup_end << dendl;
+  m_state = STATE_COPYUP_OBJECTS;
+
+  Context *ctx = create_callback_context();
+  AsyncObjectThrottle<>::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject>(),
+      boost::lambda::_1, &m_image_ctx, snapc, boost::lambda::_2));
+  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+    this, m_image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
+    copyup_end);
+  throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+void TrimRequest::send_remove_objects() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
+                           << " delete_start=" << m_delete_start
+                           << " num_objects=" << m_num_objects << dendl;
+  m_state = STATE_REMOVE_OBJECTS;
+
+  Context *ctx = create_callback_context();
+  AsyncObjectThrottle<>::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<C_RemoveObject>(),
+      boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
+  AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+    this, m_image_ctx, context_factory, ctx, &m_prog_ctx, m_delete_start,
+    m_num_objects);
+  throttle->start_ops(m_image_ctx.concurrent_management_ops);
+}
+
+void TrimRequest::send_pre_remove() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  if (m_delete_start >= m_num_objects) {
+    send_clean_boundary();
+    return;
+  }
+
+  bool remove_objects = false;
+  {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    if (!m_image_ctx.object_map.enabled()) {
+      remove_objects = true;
+    } else {
+      ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
+                               << " delete_start=" << m_delete_start
+                               << " num_objects=" << m_num_objects << dendl;
+      m_state = STATE_PRE_REMOVE;
+
+      assert(m_image_ctx.image_watcher->is_lock_owner());
+
+      // flag the objects as pending deletion
+      Context *ctx = create_callback_context();
+      RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
+      if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
+                                            OBJECT_PENDING, OBJECT_EXISTS,
+                                             ctx)) {
+        delete ctx;
+        remove_objects = true;
+      }
+    }
+  }
+
+  // avoid possible recursive lock attempts
+  if (remove_objects) {
+    // no object map update required
+    send_remove_objects();
+  }
+}
+
+void TrimRequest::send_post_remove() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  bool clean_boundary = false;
+  {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    if (!m_image_ctx.object_map.enabled()) {
+      clean_boundary = true;
+    } else {
+      ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
+                               << " delete_start=" << m_delete_start
+                               << " num_objects=" << m_num_objects << dendl;
+      m_state = STATE_POST_REMOVE;
+
+      assert(m_image_ctx.image_watcher->is_lock_owner());
+
+      // flag the pending objects as removed
+      Context *ctx = create_callback_context();
+      RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
+      if (!m_image_ctx.object_map.aio_update(m_delete_start, m_num_objects,
+                                            OBJECT_NONEXISTENT,
+                                            OBJECT_PENDING, ctx)) {
+        delete ctx;
+       clean_boundary = true;
+      }
+    }
+  }
+
+  // avoid possible recursive lock attempts
+  if (clean_boundary) {
+    // no object map update required
+    send_clean_boundary();
+  }
+}
+
+void TrimRequest::send_clean_boundary() {
+  assert(m_image_ctx.owner_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+  if (m_delete_off <= m_new_size) {
+    finish(0);
+    return;
+  }
+
+  // should have been canceled prior to releasing lock
+  assert(!m_image_ctx.image_watcher->is_lock_supported() ||
+         m_image_ctx.image_watcher->is_lock_owner());
+  uint64_t delete_len = m_delete_off - m_new_size;
+  ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
+                           << " delete_off=" << m_delete_off
+                           << " length=" << delete_len << dendl;
+  m_state = STATE_CLEAN_BOUNDARY;
+
+  ::SnapContext snapc;
+  {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    snapc = m_image_ctx.snapc;
+  }
+
+  // discard the weird boundary
+  std::vector<ObjectExtent> extents;
+  Striper::file_to_extents(cct, m_image_ctx.format_string,
+                          &m_image_ctx.layout, m_new_size, delete_len, 0,
+                           extents);
+
+  ContextCompletion *completion =
+    new ContextCompletion(create_callback_context(), true);
+  for (vector<ObjectExtent>::iterator p = extents.begin();
+       p != extents.end(); ++p) {
+    ldout(cct, 20) << " ex " << *p << dendl;
+    Context *req_comp = new C_ContextCompletion(*completion);
+
+    AioObjectRequest *req;
+    if (p->offset == 0) {
+      req = new AioObjectTrim(&m_image_ctx, p->oid.name, p->objectno, snapc,
+                              req_comp);
+    } else {
+      req = new AioObjectTruncate(&m_image_ctx, p->oid.name, p->objectno,
+                                  p->offset, snapc, req_comp);
+    }
+    req->send();
+  }
+  completion->finish_adding_requests();
+}
+
+void TrimRequest::finish(int r) {
+  m_state = STATE_FINISHED;
+  async_complete(r);
+}
+
+} // namespace operation
+} // namespace librbd
diff --git a/src/librbd/operation/TrimRequest.h b/src/librbd/operation/TrimRequest.h
new file mode 100644 (file)
index 0000000..d781c0c
--- /dev/null
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+#define CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+namespace operation {
+
+class TrimRequest : public AsyncRequest<>
+{
+public:
+  TrimRequest(ImageCtx &image_ctx, Context *on_finish,
+                  uint64_t original_size, uint64_t new_size,
+                  ProgressContext &prog_ctx);
+
+  virtual void send();
+
+protected:
+  /**
+   * Trim goes through the following state machine to remove whole objects,
+   * clean partially trimmed objects, and update the object map:
+   *
+   * @verbatim
+   *
+   *     <start> . . . . > STATE_FINISHED . . . . . . . . .
+   *      |   .                                           .
+   *      |   . . . . . . . . . . . .                     .
+   *      |                         .                     .
+   *      v                         .                     .
+   * STATE_COPYUP_OBJECTS . . .     .                     .
+   *      |                   .     .                     .
+   *      |                   .     .                     .
+   *      v                   v     v                     .
+   * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS           .
+   *                                |   .   .             .
+   *        /-----------------------/   .   . . . . . .   .
+   *        |                           .             .   .
+   *        v                           v             v   v
+   * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
+   *        .                                           ^
+   *        .                                           .
+   *        . . . . . . . . . . . . . . . . . . . . . . .
+   *
+   * @endverbatim
+   *
+   * The _COPYUP_OBJECTS state is skipped if there is no parent overlap
+   * within the new image size and the image does not have any snapshots.
+   * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+   * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
+   * are removed.  The _CLEAN_BOUNDARY state is skipped if no boundary
+   * objects are cleaned.  The state machine will immediately transition
+   * to _FINISHED state if there are no bytes to trim.
+   */ 
+
+  enum State {
+    STATE_COPYUP_OBJECTS,
+    STATE_PRE_REMOVE,
+    STATE_REMOVE_OBJECTS,
+    STATE_POST_REMOVE,
+    STATE_CLEAN_BOUNDARY,
+    STATE_FINISHED
+  };
+
+  virtual bool should_complete(int r);
+
+  State m_state;
+
+private:
+  uint64_t m_delete_start;
+  uint64_t m_num_objects;
+  uint64_t m_delete_off;
+  uint64_t m_new_size;
+  ProgressContext &m_prog_ctx;
+
+  void send_copyup_objects();
+  void send_remove_objects();
+  void send_pre_remove();
+  void send_post_remove();
+  void send_clean_boundary();
+  void finish(int r);
+};
+
+} // namespace operation
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_OPERATION_TRIM_REQUEST_H