]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: object map updates should use AIO 2700/head
authorJason Dillaman <dillaman@redhat.com>
Tue, 27 Jan 2015 11:07:13 +0000 (06:07 -0500)
committerJason Dillaman <dillaman@redhat.com>
Thu, 29 Jan 2015 02:12:53 +0000 (21:12 -0500)
Update the existing AbstractWrite state machine to handle
updating the object map before and after the object update.
Also convert the resize/trim maintenance operations to use
new state machines which handle the pre-/post-operation
updates to the object map.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
26 files changed:
src/cls/rbd/cls_rbd_client.cc
src/cls/rbd/cls_rbd_client.h
src/include/Context.h
src/librbd/AioRequest.cc
src/librbd/AioRequest.h
src/librbd/AsyncFlattenRequest.cc [new file with mode: 0644]
src/librbd/AsyncFlattenRequest.h [new file with mode: 0644]
src/librbd/AsyncObjectThrottle.cc
src/librbd/AsyncObjectThrottle.h
src/librbd/AsyncRequest.cc [new file with mode: 0644]
src/librbd/AsyncRequest.h [new file with mode: 0644]
src/librbd/AsyncResizeRequest.cc [new file with mode: 0644]
src/librbd/AsyncResizeRequest.h [new file with mode: 0644]
src/librbd/AsyncTrimRequest.cc [new file with mode: 0644]
src/librbd/AsyncTrimRequest.h [new file with mode: 0644]
src/librbd/CopyupRequest.cc
src/librbd/CopyupRequest.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/Makefile.am
src/librbd/ObjectMap.cc
src/librbd/ObjectMap.h
src/librbd/internal.cc
src/librbd/internal.h
src/osdc/ObjectCacher.h
src/test/cls_rbd/test_cls_rbd.cc

index 7b144ffe7ea75815077b211bdcc6946dd2df8435..faef6f3ceb35e3a9f3cba4313f89a60c5e5b1996 100644 (file)
@@ -265,15 +265,13 @@ namespace librbd {
       return 0;
     }
 
-    int set_flags(librados::IoCtx *ioctx, const std::string &oid,
-                  uint64_t flags, uint64_t mask)
+    void set_flags(librados::ObjectWriteOperation *op, uint64_t flags,
+                  uint64_t mask)
     {
       bufferlist inbl;
       ::encode(flags, inbl);
       ::encode(mask, inbl);
-
-      bufferlist outbl;
-      return ioctx->exec(oid, "rbd", "set_flags", inbl, outbl);
+      op->exec("rbd", "set_flags", inbl);
     }
 
     int remove_parent(librados::IoCtx *ioctx, const std::string &oid)
@@ -301,16 +299,23 @@ namespace librbd {
       return ioctx->exec(oid, "rbd", "add_child", in, out);
     }
 
-    int remove_child(librados::IoCtx *ioctx, const std::string &oid,
-                    parent_spec pspec, const std::string &c_imageid)
+    void remove_child(librados::ObjectWriteOperation *op,
+                     parent_spec pspec, const std::string &c_imageid)
     {
-      bufferlist in, out;
+      bufferlist in;
       ::encode(pspec.pool_id, in);
       ::encode(pspec.image_id, in);
       ::encode(pspec.snap_id, in);
       ::encode(c_imageid, in);
+      op->exec("rbd", "remove_child", in);
+    }
 
-      return ioctx->exec(oid, "rbd", "remove_child", in, out);
+    int remove_child(librados::IoCtx *ioctx, const std::string &oid,
+                    parent_spec pspec, const std::string &c_imageid)
+    {
+      librados::ObjectWriteOperation op;
+      remove_child(&op, pspec, c_imageid);
+      return ioctx->operate(oid, &op);
     }
 
     int get_children(librados::IoCtx *ioctx, const std::string &oid,
index d78175fb67cf8930412556f66f50563e9a93e1be..0e21da33a6fa370ced04df3bf3727ecfccb1c1a2 100644 (file)
@@ -49,12 +49,14 @@ namespace librbd {
                   parent_spec pspec, uint64_t parent_overlap);
     int get_flags(librados::IoCtx *ioctx, const std::string &oid,
                   snapid_t snap_id, uint64_t *flags);
-    int set_flags(librados::IoCtx *ioctx, const std::string &oid,
-                  uint64_t flags, uint64_t mask);
+    void set_flags(librados::ObjectWriteOperation *op,
+                   uint64_t flags, uint64_t mask);
     int remove_parent(librados::IoCtx *ioctx, const std::string &oid);
     void remove_parent(librados::ObjectWriteOperation *op);
     int add_child(librados::IoCtx *ioctx, const std::string &oid,
                  parent_spec pspec, const std::string &c_imageid);
+    void remove_child(librados::ObjectWriteOperation *op,
+                     parent_spec pspec, const std::string &c_imageid);
     int remove_child(librados::IoCtx *ioctx, const std::string &oid,
                     parent_spec pspec, const std::string &c_imageid);
     int get_children(librados::IoCtx *ioctx, const std::string &oid,
index 0b91e70c3d728fe21c5b2ccd1ff85ed57a1372ba..2ae92214b724224add05f084e6ddb6de923c816f 100644 (file)
@@ -452,16 +452,16 @@ typedef C_GatherBuilderBase<Context, C_Gather > C_GatherBuilder;
 
 class FunctionContext : public Context {
 public:
-  FunctionContext(const boost::function<void()> &callback)
+  FunctionContext(const boost::function<void(int)> &callback)
     : m_callback(callback)
   {
   }
 
   virtual void finish(int r) {
-    m_callback();
+    m_callback(r);
   }
 private:
-  boost::function<void()> m_callback;
+  boost::function<void(int)> m_callback;
 };
 
 #undef mydout
index 2efde981d6274c7ce4dcc3a0ab29958009f102ed..907367a192e1d366067683d98b65ad7f81a32710 100644 (file)
@@ -3,16 +3,20 @@
 
 #include "common/ceph_context.h"
 #include "common/dout.h"
+#include "common/errno.h"
 #include "common/Mutex.h"
 #include "common/RWLock.h"
 
 #include "librbd/AioCompletion.h"
 #include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 
 #include "librbd/AioRequest.h"
 #include "librbd/CopyupRequest.h"
-#include "librbd/ObjectMap.h"
+
+#include <boost/bind.hpp>
+#include <boost/optional.hpp>
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -21,7 +25,7 @@
 namespace librbd {
 
   AioRequest::AioRequest() :
-    m_ictx(NULL), m_ioctx(NULL),
+    m_ictx(NULL),
     m_object_no(0), m_object_off(0), m_object_len(0),
     m_snap_id(CEPH_NOSNAP), m_completion(NULL), m_parent_completion(NULL),
     m_hide_enoent(false) {}
@@ -30,7 +34,7 @@ namespace librbd {
                         const ::SnapContext &snapc, librados::snap_t snap_id,
                         Context *completion,
                         bool hide_enoent) :
-    m_ictx(ictx), m_ioctx(&ictx->data_ctx), m_oid(oid), m_object_no(objectno),
+    m_ictx(ictx), m_oid(oid), m_object_no(objectno),
     m_object_off(off), m_object_len(len), m_snap_id(snap_id),
     m_completion(completion), m_parent_completion(NULL),
     m_hide_enoent(hide_enoent) {
@@ -176,7 +180,7 @@ namespace librbd {
                                                        m_object_no,
                                                       m_image_extents);
             m_ictx->copyup_list[m_object_no] = new_req;
-            new_req->queue_read_from_parent();
+            new_req->queue_send();
           }
         }
       }
@@ -219,8 +223,7 @@ namespace librbd {
     }
     op.set_op_flags2(m_op_flags);
 
-    r = m_ioctx->aio_operate(m_oid, rados_completion, &op, flags, NULL);
-
+    r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags, NULL);
     rados_completion->release();
     return r;
   }
@@ -240,8 +243,12 @@ namespace librbd {
                               bool hide_enoent)
     : AioRequest(ictx, oid, object_no, object_off, len, snapc, snap_id, 
                  completion, hide_enoent),
-      m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val), m_entire_object(NULL)
+      m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val),
+      m_entire_object(NULL)
   {
+    m_io_ctx.dup(ictx->data_ctx);
+    m_io_ctx.snap_set_read(CEPH_NOSNAP);
+
     m_object_image_extents = objectx;
     m_parent_overlap = object_overlap;
   }
@@ -252,6 +259,8 @@ namespace librbd {
       m_state = LIBRBD_AIO_WRITE_GUARD;
       m_write.assert_exists();
       ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
+    } else {
+      m_state = LIBRBD_AIO_WRITE_FLAT;
     }
   }
 
@@ -263,6 +272,21 @@ namespace librbd {
     map<uint64_t, CopyupRequest*>::iterator it;
     bool finished = true;
     switch (m_state) {
+    case LIBRBD_AIO_WRITE_PRE:
+      ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
+      if (r < 0) {
+       return true;
+      }
+
+      send_write();
+      finished = false;
+      break;
+
+    case LIBRBD_AIO_WRITE_POST:
+      ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
+      finished = true;
+      break;
+
     case LIBRBD_AIO_WRITE_GUARD:
       ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
 
@@ -314,7 +338,7 @@ namespace librbd {
 
               m_entire_object = &(new_req->get_copyup_data());
               m_ictx->copyup_list_lock.Unlock();
-              new_req->read_from_parent();
+              new_req->send();
             } else {
               it->second->append_request(this);
               m_entire_object = &it->second->get_copyup_data();
@@ -332,25 +356,27 @@ namespace librbd {
        }
        finished = false;
        break;
-      }
-      if (r < 0) {
-       ldout(m_ictx->cct, 20) << "error checking for object existence" << dendl;
+      } else if (r < 0) {
+        // pass the error code to the finish context
+        m_state = LIBRBD_AIO_WRITE_ERROR;
+        complete(r);
+       finished = false;
        break;
       }
+
+      finished = send_post();
       break;
 
     case LIBRBD_AIO_WRITE_COPYUP:
       ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
       m_state = LIBRBD_AIO_WRITE_GUARD;
-      if (r < 0)
+      if (r < 0) {
        return should_complete(r);
+      }
 
       // Read data from waiting list safely. If this AioWrite created a
       // CopyupRequest, m_read_data should be empty.
       if (m_entire_object != NULL) {
-       assert(m_ictx->copyup_list_lock.is_locked());
-       assert(m_ictx->copyup_list.find(m_object_no) !=
-              m_ictx->copyup_list.end());
        assert(m_read_data.length() == 0);
        m_read_data.append(*m_entire_object);
       }
@@ -361,7 +387,14 @@ namespace librbd {
 
     case LIBRBD_AIO_WRITE_FLAT:
       ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
-      // nothing to do
+
+      finished = send_post();
+      break;
+
+    case LIBRBD_AIO_WRITE_ERROR:
+      assert(r < 0);
+      lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r)
+                        << dendl; 
       break;
 
     default:
@@ -373,32 +406,111 @@ namespace librbd {
   }
 
   int AbstractWrite::send() {
-    ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl;
+    ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " "
+                          << m_object_off << "~" << m_object_len << dendl;
+
+    if (send_pre()) {
+      return 0;
+    } else {
+      send_write();
+    }
+    return 0;
+  }
+
+  bool AbstractWrite::send_pre() {
+    bool lost_exclusive_lock = false;
+    {
+      RWLock::RLocker l(m_ictx->owner_lock);
+      RWLock::RLocker l2(m_ictx->md_lock);
+      if (m_ictx->object_map == NULL) {
+       return false;
+      }
+
+      if (!m_ictx->image_watcher->is_lock_owner()) {
+       ldout(m_ictx->cct, 1) << "lost exclusive lock during write" << dendl;
+       lost_exclusive_lock = true;
+      } else {
+       ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
+                              << m_object_off << "~" << m_object_len << dendl;
+
+        uint8_t new_state;
+        boost::optional<uint8_t> current_state;
+        pre_object_map_update(&new_state);
+
+        m_state = LIBRBD_AIO_WRITE_PRE; 
+        FunctionContext *ctx = new FunctionContext(
+          boost::bind(&AioRequest::complete, this, _1));
+        m_ictx->object_map->aio_update(m_object_no, new_state,
+                                      current_state, ctx);
+      }
+    }
+    
+    if (lost_exclusive_lock) {
+      complete(-ERESTART);
+    }
+    return true;
+  }
+
+  bool AbstractWrite::send_post() {
+    ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
+                          << m_object_off << "~" << m_object_len << dendl;
+
+    RWLock::RLocker l(m_ictx->owner_lock);
+    RWLock::RLocker l2(m_ictx->md_lock);
+    if (m_ictx->object_map == NULL || !post_object_map_update()) {
+      return true;
+    }
+
+    if (m_ictx->image_watcher->is_lock_supported() &&
+        !m_ictx->image_watcher->is_lock_owner()) {
+      // leave the object flagged as pending
+      ldout(m_ictx->cct, 1) << "lost exclusive lock during write" << dendl;
+      return true;
+    }
+
+    m_state = LIBRBD_AIO_WRITE_POST;
+    FunctionContext *ctx = new FunctionContext(
+      boost::bind(&AioRequest::complete, this, _1));
+    m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT,
+                                   OBJECT_PENDING, ctx);
+    return false;  
+  } 
+
+  void AbstractWrite::send_write() {
+    ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+                          << m_object_off << "~" << m_object_len << dendl;
+
+    guard_write();
+    add_write_ops(&m_write);
+    assert(m_write.size() != 0);
+
     librados::AioCompletion *rados_completion =
       librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
-    int r;
-    assert(m_write.size());
-    r = m_ioctx->aio_operate(m_oid, rados_completion, &m_write,
-                            m_snap_seq, m_snaps);
+    int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
+                                        m_snap_seq, m_snaps);
+    assert(r == 0);
     rados_completion->release();
-    return r;
   }
 
   void AbstractWrite::send_copyup() {
     ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len << dendl;
-    if (!m_read_data.is_zero())
-      m_copyup.exec("rbd", "copyup", m_read_data);
-    add_copyup_ops();
+    librados::ObjectWriteOperation op;
+    if (!m_read_data.is_zero()) {
+      op.exec("rbd", "copyup", m_read_data);
+    }
+    add_write_ops(&op);
+    assert(op.size() != 0);
 
     librados::AioCompletion *rados_completion =
       librados::Rados::aio_create_completion(this, NULL, rados_req_cb);
-    m_ictx->md_ctx.aio_operate(m_oid, rados_completion, &m_copyup,
+    m_ictx->md_ctx.aio_operate(m_oid, rados_completion, &op,
                               m_snap_seq, m_snaps);
     rados_completion->release();
   }
 
-  void AioWrite::add_write_ops(librados::ObjectWriteOperation &wr) {
-    wr.set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
-    wr.write(m_object_off, m_write_data);
+  void AioWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
+    wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
+    wr->write(m_object_off, m_write_data);
+    wr->set_op_flags2(m_op_flags);
   }
 }
index 4e29feb82aa6a699edcdb46de25fb672a2036877..4ffc7491838f42632f7dc46b2335067a6a3c3cb5 100644 (file)
@@ -11,6 +11,7 @@
 #include "include/buffer.h"
 #include "include/Context.h"
 #include "include/rados/librados.hpp"
+#include "librbd/ObjectMap.h"
 
 namespace librbd {
 
@@ -50,7 +51,6 @@ namespace librbd {
     void read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents);
 
     ImageCtx *m_ictx;
-    librados::IoCtx *m_ioctx;
     std::string m_oid;
     uint64_t m_object_no, m_object_off, m_object_len;
     librados::snap_t m_snap_id;
@@ -125,7 +125,6 @@ namespace librbd {
     virtual ~AbstractWrite() {}
     virtual bool should_complete(int r);
     virtual int send();
-    void guard_write();
 
     bool has_parent() const {
       return !m_object_image_extents.empty();
@@ -134,38 +133,65 @@ namespace librbd {
   private:
     /**
      * Writes go through the following state machine to deal with
-     * layering:
+     * layering and the object map:
      *
-     *                           need copyup
-     * LIBRBD_AIO_WRITE_GUARD ---------------> LIBRBD_AIO_WRITE_COPYUP
-     *           |        ^                              |
-     *           v        \------------------------------/
-     *         done
-     *           ^
-     *           |
-     * LIBRBD_AIO_WRITE_FLAT
+     * <start>
+     *  .  |
+     *  .  |
+     *  .  \---> LIBRBD_AIO_WRITE_PRE
+     *  .           |         |
+     *  . . . . . . | . . . . | . . . . . . . . . . . 
+     *      .       |   -or-  |                     .
+     *      .       |         |                     v
+     *      .       |         \----------------> LIBRBD_AIO_WRITE_FLAT . . .
+     *      .       |                                               |      .
+     *      v       v         need copyup                           |      .
+     * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP  |      .
+     *  .       |   ^                           |                   |      .
+     *  .       |   |                           |                   |      .
+     *  .       |   \---------------------------/                   |      .
+     *  .       |                                                   |      .
+     *  .       \-------------------\           /-------------------/      .
+     *  .                           |           |                          .
+     *  .                       LIBRBD_AIO_WRITE_POST                      .
+     *  .                                |                                 .
+     *  .                                v                                 .
+     *  . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . . 
      *
-     * Writes start in LIBRBD_AIO_WRITE_GUARD or _FLAT, depending on whether
-     * there is a parent or not.
+     * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+     * is disabled.  The write starts in _WRITE_GUARD or _FLAT depending on
+     * whether or not there is a parent overlap.
      */
     enum write_state_d {
       LIBRBD_AIO_WRITE_GUARD,
       LIBRBD_AIO_WRITE_COPYUP,
-      LIBRBD_AIO_WRITE_FLAT
+      LIBRBD_AIO_WRITE_FLAT,
+      LIBRBD_AIO_WRITE_PRE,
+      LIBRBD_AIO_WRITE_POST,
+      LIBRBD_AIO_WRITE_ERROR
     };
 
   protected:
-    virtual void add_copyup_ops() = 0;
-
     write_state_d m_state;
     vector<pair<uint64_t,uint64_t> > m_object_image_extents;
     uint64_t m_parent_overlap;
     librados::ObjectWriteOperation m_write;
-    librados::ObjectWriteOperation m_copyup;
     uint64_t m_snap_seq;
     ceph::bufferlist *m_entire_object;
 
+    virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
+    virtual void guard_write();
+    virtual void pre_object_map_update(uint8_t *new_state) = 0;
+    virtual bool post_object_map_update() {
+      return false;
+    }
+
   private:
+    librados::IoCtx m_io_ctx;
+
+    bool send_pre();
+    bool send_post();
+    void send_write();
     void send_copyup();
   };
 
@@ -182,23 +208,22 @@ namespace librbd {
                      objectx, object_overlap,
                      snapc, snap_id,
                      completion, false),
-       m_write_data(data) {
-      guard_write();
-      add_write_ops(m_write);
+       m_write_data(data), m_op_flags(0) {
     }
     virtual ~AioWrite() {}
 
     void set_op_flags(int op_flags) {
-      m_write.set_op_flags2(op_flags);
+      m_op_flags = op_flags;
     }
   protected:
-    virtual void add_copyup_ops() {
-      add_write_ops(m_copyup);
+    virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+    virtual void pre_object_map_update(uint8_t *new_state) {
+      *new_state = OBJECT_EXISTS;
     }
 
   private:
-    void add_write_ops(librados::ObjectWriteOperation &wr);
     ceph::bufferlist m_write_data;
+    int m_op_flags;
   };
 
   class AioRemove : public AbstractWrite {
@@ -213,18 +238,37 @@ namespace librbd {
                      objectx, object_overlap,
                      snapc, snap_id, completion,
                      true) {
-      if (has_parent())
-       m_write.truncate(0);
-      else
-       m_write.remove();
     }
     virtual ~AioRemove() {}
 
   protected:
-    virtual void add_copyup_ops() {
-      // removing an object never needs to copyup
-      assert(0);
+    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+      if (has_parent()) {
+       m_object_state = OBJECT_EXISTS;
+       wr->truncate(0);
+      } else {
+       m_object_state = OBJECT_PENDING;
+       wr->remove();
+      }
+    }
+
+    virtual void pre_object_map_update(uint8_t *new_state) {
+      *new_state = m_object_state;
     }
+
+    virtual bool post_object_map_update() {
+      if (m_object_state == OBJECT_EXISTS) {
+       return false;
+      }
+      return true;
+    }
+
+    virtual void guard_write() {
+      // do nothing to disable write guard
+    }
+
+  private:
+    uint8_t m_object_state;
   };
 
   class AioTruncate : public AbstractWrite {
@@ -239,14 +283,16 @@ namespace librbd {
                      objectx, object_overlap,
                      snapc, snap_id, completion,
                      true) {
-      guard_write();
-      m_write.truncate(object_off);
     }
     virtual ~AioTruncate() {}
 
   protected:
-    virtual void add_copyup_ops() {
-      m_copyup.truncate(m_object_off);
+    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+      wr->truncate(m_object_off);
+    }
+
+    virtual void pre_object_map_update(uint8_t *new_state) {
+      *new_state = OBJECT_EXISTS;
     }
   };
 
@@ -262,14 +308,16 @@ namespace librbd {
                      objectx, object_overlap,
                      snapc, snap_id, completion,
                      true) {
-      guard_write();
-      m_write.zero(object_off, object_len);
     }
     virtual ~AioZero() {}
 
   protected:
-    virtual void add_copyup_ops() {
-      m_copyup.zero(m_object_off, m_object_len);
+    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+      wr->zero(m_object_off, m_object_len);
+    }
+
+    virtual void pre_object_map_update(uint8_t *new_state) {
+      *new_state = OBJECT_EXISTS;
     }
   };
 
diff --git a/src/librbd/AsyncFlattenRequest.cc b/src/librbd/AsyncFlattenRequest.cc
new file mode 100644 (file)
index 0000000..eacdc4b
--- /dev/null
@@ -0,0 +1,210 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/AsyncFlattenRequest.h"
+#include "librbd/AioRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include <boost/lambda/bind.hpp> 
+#include <boost/lambda/construct.hpp>  
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix 
+#define dout_prefix *_dout << "librbd::AsyncFlattenRequest: "
+
+namespace librbd {
+
+class AsyncFlattenObjectContext : public C_AsyncObjectThrottle {
+public:
+  AsyncFlattenObjectContext(AsyncObjectThrottle &throttle, ImageCtx *image_ctx,
+                            uint64_t object_size, ::SnapContext snapc,
+                            uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle), m_image_ctx(*image_ctx),
+      m_object_size(object_size), m_snapc(snapc), m_object_no(object_no)
+  {
+  }
+
+  virtual int send() {
+    CephContext *cct = m_image_ctx.cct;
+
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+        !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(cct, 1) << "lost exclusive lock during flatten" << dendl;
+      return -ERESTART;
+    }
+
+    RWLock::RLocker l2(m_image_ctx.md_lock);
+    uint64_t overlap;
+    {
+      RWLock::RLocker l3(m_image_ctx.parent_lock);
+      // stop early if the parent went away - it just means
+      // another flatten finished first, so this one is useless.
+      if (!m_image_ctx.parent) {
+        return 1;
+      }
+
+      // resize might have occurred while flatten is running
+      overlap = min(m_image_ctx.size, m_image_ctx.parent_md.overlap);
+    }
+
+    // map child object onto the parent
+    vector<pair<uint64_t,uint64_t> > objectx;
+    Striper::extent_to_file(cct, &m_image_ctx.layout, m_object_no,
+                           0, m_object_size, objectx);
+    uint64_t object_overlap = m_image_ctx.prune_parent_extents(objectx, overlap);
+    assert(object_overlap <= m_object_size);
+    if (object_overlap == 0) {
+      // resize shrunk image while flattening
+      return 1;
+    }
+
+    bufferlist bl;
+    string oid = m_image_ctx.get_object_name(m_object_no);
+    AioWrite *req = new AioWrite(&m_image_ctx, oid, m_object_no, 0, objectx,
+                                 object_overlap, bl, m_snapc, CEPH_NOSNAP,
+                                 this);
+    int r = req->send();
+    assert(r == 0);
+    return 0;
+  }
+
+private:
+  ImageCtx &m_image_ctx;
+  uint64_t m_object_size;
+  ::SnapContext m_snapc;
+  uint64_t m_object_no;
+};
+
+bool AsyncFlattenRequest::should_complete(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+  if (r < 0 && !(r == -ENOENT && m_ignore_enoent) ) {
+    lderr(cct) << "flatten encountered an error: " << cpp_strerror(r) << dendl;
+    return true;
+  }
+
+  switch (m_state) {
+  case STATE_FLATTEN_OBJECTS:
+    ldout(cct, 5) << "FLATTEN_OBJECTS" << dendl;
+    return send_update_header();
+
+  case STATE_UPDATE_HEADER:
+    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+    return send_update_children();
+
+  case STATE_UPDATE_CHILDREN:
+    ldout(cct, 5) << "UPDATE_CHILDREN" << dendl;
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void AsyncFlattenRequest::send() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " send" << dendl;
+
+  m_state = STATE_FLATTEN_OBJECTS;
+  AsyncObjectThrottle::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<AsyncFlattenObjectContext>(),
+      boost::lambda::_1, &m_image_ctx, m_object_size, m_snapc,
+      boost::lambda::_2));
+  AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
+    context_factory, create_callback_context(), m_prog_ctx, 0,
+    m_overlap_objects);
+  throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+bool AsyncFlattenRequest::send_update_header() {
+  CephContext *cct = m_image_ctx.cct;
+  bool lost_exclusive_lock = false;
+
+  m_state = STATE_UPDATE_HEADER;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+       !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(cct, 1) << "lost exclusive lock during header update" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      ldout(cct, 5) << this << " send_update_header" << dendl;
+
+      RWLock::RLocker l2(m_image_ctx.parent_lock);
+      // stop early if the parent went away - it just means
+      // another flatten finished first, so this one is useless.
+      if (!m_image_ctx.parent) {
+       ldout(cct, 5) << "image already flattened" << dendl; 
+        return true;
+      }
+      m_ignore_enoent = true;
+      m_parent_spec = m_image_ctx.parent_md.spec;
+
+      // remove parent from this (base) image
+      librados::ObjectWriteOperation op;
+      if (m_image_ctx.image_watcher->is_lock_supported()) {
+        m_image_ctx.image_watcher->assert_header_locked(&op);
+      }
+      cls_client::remove_parent(&op);
+
+      librados::AioCompletion *rados_completion = create_callback_completion();
+      int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                        rados_completion, &op);
+      assert(r == 0);
+      rados_completion->release();
+    }
+  }
+
+  if (lost_exclusive_lock) {
+    complete(-ERESTART);
+  }
+  return false;
+}
+
+bool AsyncFlattenRequest::send_update_children() {
+  CephContext *cct = m_image_ctx.cct;
+  bool lost_exclusive_lock = false;
+
+  m_state = STATE_UPDATE_CHILDREN;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+        !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(cct, 1) << "lost exclusive lock during children update" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      // if there are no snaps, remove from the children object as well
+      // (if snapshots remain, they have their own parent info, and the child
+      // will be removed when the last snap goes away)
+      RWLock::RLocker l2(m_image_ctx.snap_lock);
+      if (!m_image_ctx.snaps.empty()) {
+        return true;
+      }
+
+      ldout(cct, 2) << "removing child from children list..." << dendl;
+      librados::ObjectWriteOperation op;
+      cls_client::remove_child(&op, m_parent_spec, m_image_ctx.id);
+
+      librados::AioCompletion *rados_completion = create_callback_completion();
+      int r = m_image_ctx.md_ctx.aio_operate(RBD_CHILDREN, rados_completion,
+                                            &op);
+      assert(r == 0);
+      rados_completion->release();
+    }
+  }  
+
+  if (lost_exclusive_lock) {
+    complete(-ERESTART);
+  }
+  return false;
+}
+
+} // namespace librbd
diff --git a/src/librbd/AsyncFlattenRequest.h b/src/librbd/AsyncFlattenRequest.h
new file mode 100644 (file)
index 0000000..3f0c612
--- /dev/null
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+#include "librbd/parent_types.h"
+#include "common/snap_types.h"
+
+namespace librbd {
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncFlattenRequest : public AsyncRequest
+{
+public:
+  AsyncFlattenRequest(ImageCtx &image_ctx, Context *on_finish,
+                     uint64_t object_size, uint64_t overlap_objects,
+                     const ::SnapContext &snapc, ProgressContext &prog_ctx)
+    : AsyncRequest(image_ctx, on_finish), m_object_size(object_size),
+      m_overlap_objects(overlap_objects), m_snapc(snapc), m_prog_ctx(prog_ctx),
+      m_ignore_enoent(false)
+  {
+  }
+
+  virtual void send();
+
+protected:
+  virtual bool should_complete(int r);
+
+private:
+  /**
+   * Flatten goes through the following state machine to copyup objects
+   * from the parent image:
+   *
+   * <start>
+   *    |
+   *    v
+   * STATE_FLATTEN_OBJECTS ---> STATE_UPDATE_HEADER . . . . .
+   *           .                         |                  .
+   *           .                         |                  .
+   *           .                         v                  .
+   *           .               STATE_UPDATE_CHILDREN        .
+   *           .                         |                  .
+   *           .                         |                  .
+   *           .                         \---> <finish> < . .
+   *           .                                   ^
+   *           .                                   .
+   *           . . . . . . . . . . . . . . . . . . .
+   *
+   * The _UPDATE_CHILDREN state will be skipped if the image has one or
+   * more snapshots. The _UPDATE_HEADER state will be skipped if the
+   * image was concurrently flattened by another client.
+   */
+  enum State {
+    STATE_FLATTEN_OBJECTS,
+    STATE_UPDATE_HEADER,
+    STATE_UPDATE_CHILDREN
+  };
+
+  uint64_t m_object_size;
+  uint64_t m_overlap_objects;
+  ::SnapContext m_snapc;
+  ProgressContext &m_prog_ctx;
+  State m_state;
+
+  parent_spec m_parent_spec;
+  bool m_ignore_enoent;
+
+  bool send_update_header();
+  bool send_update_children();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_FLATTEN_REQUEST_H
index c0ab54edcf393b0c1dd8d3efdfca7c6bd373bb1e..28a6a348d1ea1b11d894c840412bfbd099819f47 100644 (file)
@@ -17,14 +17,14 @@ AsyncObjectThrottle::AsyncObjectThrottle(const ContextFactory& context_factory,
 {
 }
 
-int AsyncObjectThrottle::start_ops(uint64_t max_concurrent) {
+void AsyncObjectThrottle::start_ops(uint64_t max_concurrent) {
   bool complete;
   {
     Mutex::Locker l(m_lock);
     for (uint64_t i = 0; i < max_concurrent; ++i) {
-      int r = start_next_op();
-      if (r < 0 && m_current_ops == 0) {
-        return r;
+      start_next_op();
+      if (m_ret < 0 && m_current_ops == 0) {
+       break;
       }
     }
     complete = (m_current_ops == 0);
@@ -33,7 +33,6 @@ int AsyncObjectThrottle::start_ops(uint64_t max_concurrent) {
     m_ctx->complete(m_ret);
     delete this;
   }
-  return 0;
 }
 
 void AsyncObjectThrottle::finish_op(int r) {
@@ -54,11 +53,11 @@ void AsyncObjectThrottle::finish_op(int r) {
   }
 }
 
-int AsyncObjectThrottle::start_next_op() {
+void AsyncObjectThrottle::start_next_op() {
   bool done = false;
   while (!done) {
     if (m_ret != 0 || m_object_no >= m_end_object_no) {
-      return m_ret;
+      return;
     }
 
     uint64_t ono = m_object_no++;
@@ -68,7 +67,7 @@ int AsyncObjectThrottle::start_next_op() {
     if (r < 0) {
       m_ret = r;
       delete ctx;
-      return m_ret;
+      return;
     } else if (r > 0) {
       // op completed immediately
       delete ctx;
@@ -78,7 +77,6 @@ int AsyncObjectThrottle::start_next_op() {
     }
     m_prog_ctx.update_progress(ono, m_end_object_no);
   }
-  return 0;
 }
 
 } // namespace librbd
index fdbab5acf6ec6a335b56b6b4e94bed08e81c3c90..c7b5bf69a627ad4a2c37cc12e17b42b5a0187354 100644 (file)
@@ -46,7 +46,7 @@ public:
                      ProgressContext &prog_ctx, uint64_t object_no,
                      uint64_t end_object_no);
 
-  int start_ops(uint64_t max_concurrent);
+  void start_ops(uint64_t max_concurrent);
   virtual void finish_op(int r);
 
 private:
@@ -59,7 +59,7 @@ private:
   uint64_t m_current_ops;
   int m_ret;
 
-  int start_next_op();
+  void start_next_op();
 };
 
 } // namespace librbd
diff --git a/src/librbd/AsyncRequest.cc b/src/librbd/AsyncRequest.cc
new file mode 100644 (file)
index 0000000..332d883
--- /dev/null
@@ -0,0 +1,19 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncRequest.h"
+#include "librbd/internal.h"
+#include <boost/bind.hpp>
+
+namespace librbd
+{
+
+librados::AioCompletion *AsyncRequest::create_callback_completion() {
+  return librados::Rados::aio_create_completion(create_callback_context(),
+                                               NULL, rados_ctx_cb);
+}
+
+Context *AsyncRequest::create_callback_context() {
+  return new FunctionContext(boost::bind(&AsyncRequest::complete, this, _1));
+}
+
+} // namespace librbd
diff --git a/src/librbd/AsyncRequest.h b/src/librbd/AsyncRequest.h
new file mode 100644 (file)
index 0000000..3fdc85c
--- /dev/null
@@ -0,0 +1,62 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/Context.h"
+#include "include/rados/librados.hpp"
+
+namespace librbd {
+
+class ImageCtx;
+
+class AsyncRequest
+{
+public:
+  AsyncRequest(ImageCtx &image_ctx, Context *on_finish)
+    : m_image_ctx(image_ctx), m_on_finish(on_finish)
+  {
+  }
+
+  virtual ~AsyncRequest() {}
+
+  void complete(int r) {
+    if (should_complete(r)) {
+      m_on_finish->complete(r);
+      delete this;
+    }
+  }
+
+  virtual void send() = 0;
+
+protected:
+  ImageCtx &m_image_ctx;
+  Context *m_on_finish;
+
+  librados::AioCompletion *create_callback_completion();
+  Context *create_callback_context();
+
+  virtual bool should_complete(int r) = 0;
+};
+
+class C_AsyncRequest : public Context
+{
+public:
+  C_AsyncRequest(AsyncRequest *req)
+    : m_req(req)
+  {
+  }
+
+protected:
+  virtual void finish(int r) {
+    m_req->complete(r);
+  }
+
+private:
+  AsyncRequest *m_req;
+};
+
+} // namespace librbd
+
+#endif //CEPH_LIBRBD_ASYNC_REQUEST_H
diff --git a/src/librbd/AsyncResizeRequest.cc b/src/librbd/AsyncResizeRequest.cc
new file mode 100644 (file)
index 0000000..d0e3307
--- /dev/null
@@ -0,0 +1,216 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncResizeRequest.h"
+#include "librbd/AsyncTrimRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/dout.h"
+#include "common/errno.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::AsyncResizeRequest: "
+
+namespace librbd
+{
+
+bool AsyncResizeRequest::should_complete(int r)
+{
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
+    RWLock::WLocker l(m_image_ctx.md_lock);
+    if (m_image_ctx.size == m_new_size) {
+      m_image_ctx.size = m_original_size;
+    }
+    return true;
+  }
+
+  switch (m_state) {
+  case STATE_TRIM_IMAGE:
+    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
+    send_grow_object_map();
+    break;
+
+  case STATE_GROW_OBJECT_MAP:
+    ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
+    send_update_header();
+    break;
+
+  case STATE_UPDATE_HEADER:
+    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
+    if (send_shrink_object_map()) {
+      return true;
+    }
+    break;
+
+  case STATE_SHRINK_OBJECT_MAP:
+    ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
+    return true;
+
+  case STATE_FINISHED:
+    ldout(cct, 5) << "FINISHED" << dendl;
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void AsyncResizeRequest::send() {
+  CephContext *cct = m_image_ctx.cct;
+  if (m_original_size == m_new_size) {
+    ldout(cct, 2) << this << " no change in size (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl; 
+    m_state = STATE_FINISHED;
+    complete(0);
+  } else if (m_new_size > m_original_size) {
+    ldout(cct, 2) << this << " expanding image (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl; 
+    send_grow_object_map();
+  } else {
+    ldout(cct, 2) << this << " shrinking image (" << m_original_size
+                 << " -> " << m_new_size << ")" << dendl; 
+    send_trim_image();
+  }
+}
+
+void AsyncResizeRequest::send_trim_image() {
+  ldout(m_image_ctx.cct, 5) << this << " send_trim_image: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_TRIM_IMAGE;
+
+  {
+    // update in-memory size to clip concurrent IO operations
+    RWLock::WLocker l(m_image_ctx.md_lock);
+    m_image_ctx.size = m_new_size;
+  }
+
+  AsyncTrimRequest *req = new AsyncTrimRequest(m_image_ctx,
+                                              create_callback_context(),
+                                              m_original_size, m_new_size,
+                                              m_prog_ctx);
+  req->send();
+}
+
+void AsyncResizeRequest::send_grow_object_map() {
+  bool lost_exclusive_lock = false;
+  bool object_map_enabled = true;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    RWLock::RLocker l2(m_image_ctx.md_lock);
+    if (m_image_ctx.object_map == NULL) {
+      object_map_enabled = false;
+    } else { 
+      ldout(m_image_ctx.cct, 5) << this << " send_grow_object_map: "
+                                << " original_size=" << m_original_size
+                                << " new_size=" << m_new_size << dendl;
+      m_state = STATE_GROW_OBJECT_MAP;
+
+      if (m_image_ctx.image_watcher->is_lock_supported() &&
+         !m_image_ctx.image_watcher->is_lock_owner()) {
+       ldout(m_image_ctx.cct, 1) << "lost exclusive lock during grow object map" << dendl;
+       lost_exclusive_lock = true;
+      } else {
+       m_image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
+                                          create_callback_context());
+       object_map_enabled = true;
+      }
+    }
+  }
+
+  if (!object_map_enabled) {
+    send_update_header();
+  } else if (lost_exclusive_lock) {
+    // only complete when not holding locks
+    complete(-ERESTART);
+  }
+}
+
+bool AsyncResizeRequest::send_shrink_object_map() {
+  bool lost_exclusive_lock = false;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    RWLock::RLocker l2(m_image_ctx.md_lock);
+    if (m_image_ctx.object_map == NULL ||
+       m_new_size > m_original_size) {
+      return true;
+    }
+
+    ldout(m_image_ctx.cct, 5) << this << " send_shrink_object_map: "
+                             << " original_size=" << m_original_size
+                             << " new_size=" << m_new_size << dendl;
+    m_state = STATE_SHRINK_OBJECT_MAP;
+
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+        !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during shrink object map" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      m_image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
+                                        create_callback_context());
+    }
+  }
+
+  if (lost_exclusive_lock) {
+    // only complete when not holding locks
+    complete(-ERESTART);
+  }
+  return false;
+}
+
+void AsyncResizeRequest::send_update_header() {
+  bool lost_exclusive_lock = false;
+
+  ldout(m_image_ctx.cct, 5) << this << " send_update_header: "
+                            << " original_size=" << m_original_size
+                            << " new_size=" << m_new_size << dendl;
+  m_state = STATE_UPDATE_HEADER;
+
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock); 
+    RWLock::WLocker l2(m_image_ctx.md_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+       !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during header update" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      m_image_ctx.size = m_new_size;
+
+      librados::ObjectWriteOperation op;
+      if (m_image_ctx.old_format) {
+       // rewrite header
+       bufferlist bl;
+       m_image_ctx.header.image_size = m_new_size;
+       bl.append((const char *)&m_image_ctx.header, sizeof(m_image_ctx.header));
+       op.write(0, bl);
+      } else {
+       if (m_image_ctx.image_watcher->is_lock_supported()) {
+         m_image_ctx.image_watcher->assert_header_locked(&op);
+       }
+       cls_client::set_size(&op, m_new_size);
+      }
+
+      librados::AioCompletion *rados_completion = create_callback_completion();
+      int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                            rados_completion, &op);
+      assert(r == 0);
+      rados_completion->release();
+    }
+  }
+
+  if (lost_exclusive_lock) {
+    // only complete when not holding locks
+    complete(-ERESTART);
+  }
+}
+
+} // namespace librbd
diff --git a/src/librbd/AsyncResizeRequest.h b/src/librbd/AsyncResizeRequest.h
new file mode 100644 (file)
index 0000000..0e72a10
--- /dev/null
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncResizeRequest : public AsyncRequest
+{
+public:
+  AsyncResizeRequest(ImageCtx &image_ctx, Context *on_finish,
+                    uint64_t original_size, uint64_t new_size,
+                    ProgressContext &prog_ctx)
+    : AsyncRequest(image_ctx, on_finish),
+      m_original_size(original_size), m_new_size(new_size),
+      m_prog_ctx(prog_ctx)
+  {
+  }
+
+  virtual void send();
+
+protected:
+  /**
+   * Resize goes through the following state machine to resize the image
+   * and update the object map:
+   *
+   * <start> ----> STATE_FINISHED --------------------------------\
+   *  |  .                                                        |
+   *  |  . . . . . . . . . . . . . . . . . .                      |
+   *  |                                    .                      |
+   *  |                                    v                      |
+   *  |---> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER -------|
+   *  |                                                           |
+   *  |                                                           |
+   *  \---> STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . .  |
+   *                                       |                   .  |
+   *                                       |                   .  |
+   *                                       v                   v  v
+   *                            STATE_SHRINK_OBJECT_MAP ---> <finish>
+   *
+   * The _OBJECT_MAP states are skipped if the object map isn't enabled.
+   * The state machine will immediately transition to _FINISHED if there
+   * are no objects to trim.
+   */ 
+  enum State {
+    STATE_TRIM_IMAGE,
+    STATE_GROW_OBJECT_MAP,
+    STATE_UPDATE_HEADER,
+    STATE_SHRINK_OBJECT_MAP,
+    STATE_FINISHED
+  };
+
+  State m_state;
+  uint64_t m_original_size;
+  uint64_t m_new_size;
+  ProgressContext &m_prog_ctx;
+
+  virtual bool should_complete(int r);
+
+  void send_trim_image();
+  void send_grow_object_map();
+  bool send_shrink_object_map();
+  void send_update_header();
+
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_RESIZE_REQUEST_H
diff --git a/src/librbd/AsyncTrimRequest.cc b/src/librbd/AsyncTrimRequest.cc
new file mode 100644 (file)
index 0000000..610d2d0
--- /dev/null
@@ -0,0 +1,298 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "librbd/AsyncTrimRequest.h"
+#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AioRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/internal.h"
+#include "librbd/ObjectMap.h"
+#include "common/ContextCompletion.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "osdc/Striper.h"
+
+#include <boost/bind.hpp>
+#include <boost/lambda/bind.hpp>
+#include <boost/lambda/construct.hpp>
+#include <boost/scope_exit.hpp>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::AsyncTrimRequest: "
+
+namespace librbd
+{
+
+class AsyncTrimObjectContext : public C_AsyncObjectThrottle {
+public:
+  AsyncTrimObjectContext(AsyncObjectThrottle &throttle, ImageCtx *image_ctx,
+                        uint64_t object_no)
+    : C_AsyncObjectThrottle(throttle), m_image_ctx(*image_ctx),
+      m_object_no(object_no)
+  {
+  }
+
+  virtual int send() {
+    {
+      RWLock::RLocker l(m_image_ctx.md_lock);
+      if (m_image_ctx.object_map != NULL &&
+          !m_image_ctx.object_map->object_may_exist(m_object_no)) {
+        return 1;
+      }
+    }
+
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+        !m_image_ctx.image_watcher->is_lock_owner()) {
+      return -ERESTART;
+    }
+
+    string oid = m_image_ctx.get_object_name(m_object_no);
+    ldout(m_image_ctx.cct, 10) << "removing " << oid << dendl;
+
+    librados::AioCompletion *rados_completion =
+      librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
+    int r = m_image_ctx.data_ctx.aio_remove(oid, rados_completion);
+    assert(r == 0);
+    rados_completion->release();
+    return 0;
+  }
+
+private:
+  ImageCtx &m_image_ctx;
+  uint64_t m_object_no;
+};
+
+AsyncTrimRequest::AsyncTrimRequest(ImageCtx &image_ctx, Context *on_finish,
+                                  uint64_t original_size, uint64_t new_size,
+                                  ProgressContext &prog_ctx)
+  : AsyncRequest(image_ctx, on_finish), m_new_size(new_size), m_prog_ctx(prog_ctx)
+{
+  uint64_t period = m_image_ctx.get_stripe_period();
+  uint64_t new_num_periods = ((m_new_size + period - 1) / period);
+  m_delete_off = MIN(new_num_periods * period, original_size);
+  // first object we can delete free and clear
+  m_delete_start = new_num_periods * m_image_ctx.get_stripe_count();
+  m_num_objects = Striper::get_num_objects(m_image_ctx.layout, original_size);
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " trim image " << original_size << " -> "
+                << m_new_size << " periods " << new_num_periods
+                 << " discard to offset " << m_delete_off
+                 << " delete objects " << m_delete_start
+                 << " to " << m_num_objects << dendl;
+}
+
+
+bool AsyncTrimRequest::should_complete(int r)
+{
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 5) << this << " should_complete: r=" << r << dendl;
+  if (r < 0) {
+    lderr(cct) << "trim encountered an error: " << cpp_strerror(r) << dendl;
+    return true;
+  }
+
+  switch (m_state) {
+  case STATE_PRE_REMOVE:
+    ldout(cct, 5) << " PRE_REMOVE" << dendl;
+    send_remove_objects();
+    break; 
+
+  case STATE_REMOVE_OBJECTS:
+    ldout(cct, 5) << " REMOVE_OBJECTS" << dendl;
+    if (send_post_remove()) {
+      return true;
+    }
+    break;
+
+  case STATE_POST_REMOVE:
+    ldout(cct, 5) << " POST_OBJECTS" << dendl;
+    if (send_clean_boundary()) {
+      return true;
+    }
+    break;
+
+  case STATE_CLEAN_BOUNDARY:
+    ldout(cct, 5) << "CLEAN_BOUNDARY" << dendl;
+    return true;
+
+  case STATE_FINISHED:
+    ldout(cct, 5) << "FINISHED" << dendl;
+    return true;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
+  }
+  return false;
+}
+
+void AsyncTrimRequest::send() {
+  if (m_delete_start < m_num_objects) {
+    send_pre_remove();
+  } else {
+    bool finished = send_clean_boundary();
+    if (finished) {
+      m_state = STATE_FINISHED;
+      complete(0);
+    }
+  }
+}
+
+void AsyncTrimRequest::send_remove_objects() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(m_image_ctx.cct, 5) << this << " send_remove_objects: "
+                           << " delete_start=" << m_delete_start
+                           << " num_objects=" << m_num_objects << dendl;
+  m_state = STATE_REMOVE_OBJECTS;
+
+  Context *ctx = create_callback_context();
+  AsyncObjectThrottle::ContextFactory context_factory(
+    boost::lambda::bind(boost::lambda::new_ptr<AsyncTrimObjectContext>(),
+      boost::lambda::_1, &m_image_ctx, boost::lambda::_2));
+  AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
+    context_factory, ctx, m_prog_ctx, m_delete_start, m_num_objects);
+  throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
+}
+
+void AsyncTrimRequest::send_pre_remove() {
+  bool lost_exclusive_lock = false;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    RWLock::RLocker l2(m_image_ctx.md_lock);
+    if (m_image_ctx.object_map == NULL) {
+      send_remove_objects();
+      return;
+    }
+
+    ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
+                             << " delete_start=" << m_delete_start
+                             << " num_objects=" << m_num_objects << dendl;
+    m_state = STATE_PRE_REMOVE;
+
+    if (!m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      // flag the objects as pending deletion
+      m_image_ctx.object_map->aio_update(
+       m_delete_start, m_num_objects, OBJECT_PENDING, OBJECT_EXISTS,
+       create_callback_context());
+    }
+  }
+
+  if (lost_exclusive_lock) {
+    complete(-ERESTART);
+  }
+}
+
+bool AsyncTrimRequest::send_post_remove() {
+  bool lost_exclusive_lock = false;
+  {
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    RWLock::RLocker l2(m_image_ctx.md_lock);
+    if (m_image_ctx.object_map == NULL) {
+      return send_clean_boundary();
+    }
+
+    ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
+                             << " delete_start=" << m_delete_start
+                             << " num_objects=" << m_num_objects << dendl;
+    m_state = STATE_POST_REMOVE;
+
+    if (!m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+    } else {
+      // flag the pending objects as removed
+      m_image_ctx.object_map->aio_update(
+       m_delete_start, m_num_objects, OBJECT_NONEXISTENT, OBJECT_PENDING,
+       create_callback_context());
+    }
+  }
+
+  if (lost_exclusive_lock) {
+    complete(-ERESTART);
+  }
+  return false;
+}
+
+bool AsyncTrimRequest::send_clean_boundary() {
+  CephContext *cct = m_image_ctx.cct;
+  if (m_delete_off <= m_new_size) {
+    return true;
+  }
+
+  bool lost_exclusive_lock = false;
+  ContextCompletion *completion = NULL;
+  {
+    ldout(m_image_ctx.cct, 5) << this << " send_clean_boundary: "
+                             << " delete_start=" << m_delete_start
+                             << " num_objects=" << m_num_objects << dendl;
+    m_state = STATE_CLEAN_BOUNDARY;
+
+    RWLock::RLocker l(m_image_ctx.owner_lock);
+    if (m_image_ctx.image_watcher->is_lock_supported() &&
+       !m_image_ctx.image_watcher->is_lock_owner()) {
+      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+      lost_exclusive_lock = true;
+    } else {
+      ::SnapContext snapc;
+      uint64_t parent_overlap;
+      {
+        RWLock::RLocker l2(m_image_ctx.snap_lock);
+        RWLock::RLocker l3(m_image_ctx.parent_lock);
+        snapc = m_image_ctx.snapc;
+        parent_overlap = m_image_ctx.parent_md.overlap;
+      }
+
+      // discard the weird boundary, if any
+      vector<ObjectExtent> extents;
+      Striper::file_to_extents(cct, m_image_ctx.format_string,
+                              &m_image_ctx.layout, m_new_size,
+                              m_delete_off - m_new_size, 0, extents);
+
+      completion = new ContextCompletion(create_callback_context(), true);
+      for (vector<ObjectExtent>::iterator p = extents.begin();
+           p != extents.end(); ++p) {
+        ldout(cct, 20) << " ex " << *p << dendl;
+        Context *req_comp = new C_ContextCompletion(*completion);
+
+        // reverse map this object extent onto the parent
+        vector<pair<uint64_t,uint64_t> > objectx;
+        Striper::extent_to_file(cct, &m_image_ctx.layout, p->objectno, 0,
+                               m_image_ctx.layout.fl_object_size, objectx);
+        uint64_t object_overlap =
+         m_image_ctx.prune_parent_extents(objectx, parent_overlap);
+
+        AbstractWrite *req;
+        if (p->offset == 0) {
+          req = new AioRemove(&m_image_ctx, p->oid.name, p->objectno, objectx,
+                              object_overlap, snapc, CEPH_NOSNAP, req_comp);
+        } else {
+          req = new AioTruncate(&m_image_ctx, p->oid.name, p->objectno, p->offset,
+                                objectx, object_overlap, snapc, CEPH_NOSNAP,
+                                req_comp);
+        }
+        int r = req->send();
+        if (r < 0) {
+          req_comp->complete(r);
+          delete req;
+          break;
+        }
+      }
+    }
+
+  }
+
+  if (lost_exclusive_lock) {
+    complete(-ERESTART);
+  } else if (completion != NULL) {
+    completion->finish_adding_requests();
+  }
+  return false;
+}
+
+} // namespace librbd
diff --git a/src/librbd/AsyncTrimRequest.h b/src/librbd/AsyncTrimRequest.h
new file mode 100644 (file)
index 0000000..7a89a11
--- /dev/null
@@ -0,0 +1,77 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
+#define CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
+
+#include "librbd/AsyncRequest.h"
+
+namespace librbd
+{
+
+class ImageCtx;
+class ProgressContext;
+
+class AsyncTrimRequest : public AsyncRequest
+{
+public:
+  AsyncTrimRequest(ImageCtx &image_ctx, Context *on_finish,
+                  uint64_t original_size, uint64_t new_size,
+                  ProgressContext &prog_ctx);
+
+  virtual void send();
+
+protected:
+  /**
+   * Trim goes through the following state machine to remove whole objects,
+   * clean partially trimmed objects, and update the object map:
+   *
+   *     <start> . . . . > STATE_FINISHED . . . . . . . . .
+   *      |   .                                           .
+   *      |   . . . . . . . . . . . .                     .
+   *      |                         .                     .
+   *      v                         v                     .
+   * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS           .
+   *                                |   .   .             .
+   *        /-----------------------/   .   . . . . . .   .
+   *        |                           .             .   .
+   *        v                           v             v   v
+   * STATE_POST_REMOVE --> STATE_CLEAN_BOUNDARY ---> <finish>
+   *        .                                           ^
+   *        .                                           .
+   *        . . . . . . . . . . . . . . . . . . . . . . .
+   *
+   * The _PRE_REMOVE/_POST_REMOVE states are skipped if the object map
+   * isn't enabled. The _REMOVE_OBJECTS state is skipped if no whole objects
+   * are removed.  The _CLEAN_BOUNDARY state is skipped if no boundary
+   * objects are cleaned.  The state machine will immediately transition
+   * to _FINISHED state if there are no bytes to trim.
+   */ 
+
+  enum State {
+    STATE_PRE_REMOVE,
+    STATE_REMOVE_OBJECTS,
+    STATE_POST_REMOVE,
+    STATE_CLEAN_BOUNDARY,
+    STATE_FINISHED
+  };
+
+  virtual bool should_complete(int r);
+
+  State m_state;
+
+private:
+  uint64_t m_delete_start;
+  uint64_t m_num_objects;
+  uint64_t m_delete_off;
+  uint64_t m_new_size;
+  ProgressContext &m_prog_ctx;
+
+  void send_remove_objects();
+  void send_pre_remove();
+  bool send_post_remove();
+  bool send_clean_boundary();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_ASYNC_TRIM_REQUEST_H
index 8328fa556ec6e4c4aa4a0a35bac523bed9afbeb6..0a6df6a60c3d7cf659a55e4c69db99acd16f087d 100644 (file)
@@ -7,12 +7,14 @@
 #include "common/Mutex.h"
 
 #include "librbd/AioCompletion.h"
-#include "librbd/ImageCtx.h"
-
 #include "librbd/AioRequest.h"
 #include "librbd/CopyupRequest.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
 #include "librbd/ObjectMap.h"
 
+#include <boost/bind.hpp>
+
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
 #define dout_prefix *_dout << "librbd::CopyupRequest: "
@@ -23,22 +25,12 @@ namespace librbd {
                                uint64_t objectno,
                               vector<pair<uint64_t,uint64_t> >& image_extents)
     : m_ictx(ictx), m_oid(oid), m_object_no(objectno),
-      m_image_extents(image_extents)
+      m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
   {
   }
 
   CopyupRequest::~CopyupRequest() {
-    assert(m_ictx->copyup_list_lock.is_locked());
     assert(m_pending_requests.empty());
-
-    map<uint64_t, CopyupRequest*>::iterator it =
-      m_ictx->copyup_list.find(m_object_no);
-    assert(it != m_ictx->copyup_list.end());
-    m_ictx->copyup_list.erase(it);
-
-    if (m_ictx->copyup_list.empty()) {
-      m_ictx->copyup_list_cond.Signal();
-    }
   }
 
   ceph::bufferlist& CopyupRequest::get_copyup_data() {
@@ -50,7 +42,11 @@ namespace librbd {
     m_pending_requests.push_back(req);
   }
 
-  void CopyupRequest::complete_all(int r) {
+  bool CopyupRequest::complete_requests(int r) {
+    if (m_pending_requests.empty()) {
+      return false;
+    }
+
     while (!m_pending_requests.empty()) {
       vector<AioRequest *>::iterator it = m_pending_requests.begin();
       AioRequest *req = *it;
@@ -59,12 +55,12 @@ namespace librbd {
       req->complete(r);
       m_pending_requests.erase(it);
     }
+    return true;
   }
 
-  void CopyupRequest::send_copyup(int r) {
+  void CopyupRequest::send_copyup() {
     ldout(m_ictx->cct, 20) << __func__ << " " << this
-                          << ": oid " << m_oid
-                          << ", r " << r << dendl;
+                          << ": oid " << m_oid << dendl;
 
     m_ictx->snap_lock.get_read();
     ::SnapContext snapc = m_ictx->snapc;
@@ -73,20 +69,6 @@ namespace librbd {
     std::vector<librados::snap_t> snaps;
     snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
 
-    {
-      RWLock::RLocker l(m_ictx->md_lock);
-      if (m_ictx->object_map != NULL) {
-       r = m_ictx->object_map->update(m_object_no, OBJECT_EXISTS,
-                                      boost::optional<uint8_t>());
-       if (r < 0) {
-         lderr(m_ictx->cct) << __func__ << " " << this
-                            << ": failed to update object map:"
-                            << cpp_strerror(r) << dendl;
-         return;
-       }
-      }
-    }
-
     librados::ObjectWriteOperation copyup_op;
     copyup_op.exec("rbd", "copyup", m_copyup_data);
 
@@ -96,55 +78,126 @@ namespace librbd {
     comp->release();
   }
 
-  void CopyupRequest::read_from_parent()
+  void CopyupRequest::send()
   {
+    m_state = STATE_READ_FROM_PARENT;
     AioCompletion *comp = aio_create_completion_internal(
-      this, &CopyupRequest::read_from_parent_cb);
+      create_callback_context(), rbd_ctx_cb);
+
     ldout(m_ictx->cct, 20) << __func__ << " " << this
                            << ": completion " << comp
                           << ", oid " << m_oid
                            << ", extents " << m_image_extents
                            << dendl;
-
     int r = aio_read(m_ictx->parent, m_image_extents, NULL, &m_copyup_data,
                     comp, 0);
     if (r < 0) {
       comp->release();
-      delete this;
+
+      remove_from_list();
+      complete_requests(r);
     }
   }
 
-  void CopyupRequest::queue_read_from_parent()
+  void CopyupRequest::queue_send()
   {
     // TODO: once the ObjectCacher allows reentrant read requests, the finisher
     // should be eliminated
     ldout(m_ictx->cct, 20) << __func__ << " " << this
                           << ": oid " << m_oid << " "
                           << ", extents " << m_image_extents << dendl;
-    C_ReadFromParent *ctx = new C_ReadFromParent(this);
+    FunctionContext *ctx = new FunctionContext(
+      boost::bind(&CopyupRequest::send, this));
     m_ictx->copyup_finisher->queue(ctx);
   }
 
-  void CopyupRequest::read_from_parent_cb(completion_t cb, void *arg)
+  void CopyupRequest::complete(int r)
   {
-    CopyupRequest *req = reinterpret_cast<CopyupRequest *>(arg);
-    AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
-
-    ldout(req->m_ictx->cct, 20) << __func__ << " " << req
-                               << ": oid " << req->m_oid
-                               << ", extents " << req->m_image_extents << dendl;
-
-    // If this entry is created by a read request, then copyup operation will
-    // be performed asynchronously. Perform cleaning up from copyup callback.
-    // If this entry is created by a write request, then copyup operation will
-    // be performed synchronously by AioWrite. After extracting data, perform
-    // cleaning up here
-    Mutex::Locker l(req->m_ictx->copyup_list_lock);
-    if (req->m_pending_requests.empty()) {
-      req->send_copyup(comp->get_return_value());
-    } else {
-      req->complete_all(comp->get_return_value());
+    if (should_complete(r)) {
+      delete this;
     }
-    delete req;
+  }
+
+  bool CopyupRequest::should_complete(int r)
+  {
+    CephContext *cct = m_ictx->cct;
+    ldout(cct, 20) << __func__ << " "
+                  << ": oid " << m_oid
+                  << ", extents " << m_image_extents
+                  << ", r " << r << dendl;
+
+    switch (m_state) {
+    case STATE_READ_FROM_PARENT:
+      ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
+      remove_from_list();
+      if (complete_requests(r)) {
+       // pending write operation: it will handle object map / copyup
+       return true;
+      } else if (r < 0) {
+       // nothing to copyup
+       return true;
+      } else if (send_object_map()) {
+       return true; 
+      }
+      break;
+
+    case STATE_OBJECT_MAP:
+      ldout(cct, 20) << "OBJECT_MAP" << dendl;
+      if (r == 0) {
+       send_copyup();
+      }
+      return true;
+
+    default:
+      lderr(cct) << "invalid state: " << m_state << dendl;
+      assert(false);
+      break;
+    }
+    return false;
+  }
+
+  void CopyupRequest::remove_from_list()
+  {
+    Mutex::Locker l(m_ictx->copyup_list_lock);
+
+    map<uint64_t, CopyupRequest*>::iterator it =
+      m_ictx->copyup_list.find(m_object_no);
+    assert(it != m_ictx->copyup_list.end());
+    m_ictx->copyup_list.erase(it);
+
+    if (m_ictx->copyup_list.empty()) {
+      m_ictx->copyup_list_cond.Signal();
+    }
+  }
+
+  bool CopyupRequest::send_object_map() {
+    bool object_map_enabled = true;
+    {
+      RWLock::RLocker l(m_ictx->owner_lock);
+      RWLock::RLocker l2(m_ictx->md_lock);
+      if (m_ictx->object_map == NULL) {
+       object_map_enabled = false;
+      } else if (!m_ictx->image_watcher->is_lock_owner()) {
+       ldout(m_ictx->cct, 20) << "exclusive lock not held for copy-on-read"
+                              << dendl;
+       return true; 
+      } else {
+       m_state = STATE_OBJECT_MAP;
+        m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS,
+                                      boost::optional<uint8_t>(),
+                                      create_callback_context());
+      }
+    }
+
+    if (!object_map_enabled) {
+      send_copyup();
+      return true;
+    }        
+    return false;
+  }
+
+  Context *CopyupRequest::create_callback_context()
+  {
+    return new FunctionContext(boost::bind(&CopyupRequest::complete, this, _1));
   }
 }
index 107189f1f69b312eafc28cd0fdbebbd3e5617344..6ec13071bbb9a078a1047e82ef07eebf7a61a68d 100644 (file)
@@ -21,33 +21,50 @@ namespace librbd {
 
     ceph::bufferlist& get_copyup_data();
     void append_request(AioRequest *req);
-    void read_from_parent();
-    void queue_read_from_parent();
 
-  private:
-    class C_ReadFromParent : public Context {
-    public:
-      C_ReadFromParent(CopyupRequest *c) : m_req(c) {}
-
-      virtual void finish(int r) {
-        m_req->read_from_parent();
-      }
+    void send();
+    void queue_send();
 
-    private:
-      CopyupRequest *m_req;
+  private:
+    /**
+     * Copyup requests go through the following state machine to read from the
+     * parent image, update the object map, and copyup the object:
+     *
+     * <start>
+     *    |
+     *    v
+     * STATE_READ_FROM_PARENT ---> STATE_OBJECT_MAP
+     *    .                           |
+     *    . . . . . . . . . . . . .   |
+     *                            .   |
+     *                            v   v
+     *                           <finish>
+     * The _OBJECT_MAP state is skipped if the object map isn't enabled.
+     */
+    enum State {
+      STATE_READ_FROM_PARENT,
+      STATE_OBJECT_MAP
     };
 
     ImageCtx *m_ictx;
     std::string m_oid;
     uint64_t m_object_no;
     vector<pair<uint64_t,uint64_t> > m_image_extents;
+    State m_state;
     ceph::bufferlist m_copyup_data;
     vector<AioRequest *> m_pending_requests;
 
-    void complete_all(int r);
-    void send_copyup(int r);
-    static void read_from_parent_cb(completion_t cb, void *arg);
+    bool complete_requests(int r);
+
+    void complete(int r);
+    bool should_complete(int r);
+
+    void remove_from_list();
+
+    bool send_object_map(); 
+    void send_copyup();
 
+    Context *create_callback_context();
   };
 }
 
index 9fa8689b4ad01f950673ba0f4f9ad9799e5a9a45..0bd2372e111474dd20d85d66c1d475589e42e1ef 100644 (file)
@@ -505,7 +505,7 @@ namespace librbd {
       onfinish->complete(r);
   }
 
-  void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len,
+  void ImageCtx::write_to_cache(object_t o, const bufferlist& bl, size_t len,
                                uint64_t off, Context *onfinish) {
     snap_lock.get_read();
     ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl,
index c7aa8cc3e36cf224d7f19631fef33be18f39098e..16a46aaf13f5eecf12b2ba8eec1c068455078e06 100644 (file)
@@ -163,8 +163,8 @@ namespace librbd {
                           uint64_t *overlap) const;
     void aio_read_from_cache(object_t o, uint64_t object_no, bufferlist *bl,
                             size_t len, uint64_t off, Context *onfinish);
-    void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off,
-                       Context *onfinish);
+    void write_to_cache(object_t o, const bufferlist& bl, size_t len,
+                       uint64_t off, Context *onfinish);
     int read_from_cache(object_t o, uint64_t object_no, bufferlist *bl,
                        size_t len, uint64_t off);
     void user_flushed();
index 2880bedb69123338a74d74bd69beeb48e19d0514..2836c696da0365e9f78ef47728d250a4712f0c74 100644 (file)
@@ -1,7 +1,11 @@
 librbd_internal_la_SOURCES = \
        librbd/AioCompletion.cc \
        librbd/AioRequest.cc \
+       librbd/AsyncFlattenRequest.cc \
        librbd/AsyncObjectThrottle.cc \
+       librbd/AsyncRequest.cc \
+       librbd/AsyncResizeRequest.cc \
+       librbd/AsyncTrimRequest.cc \
        librbd/CopyupRequest.cc \
        librbd/ImageCtx.cc \
        librbd/ImageWatcher.cc \
@@ -38,7 +42,11 @@ lib_LTLIBRARIES += librbd.la
 noinst_HEADERS += \
        librbd/AioCompletion.h \
        librbd/AioRequest.h \
+       librbd/AsyncFlattenRequest.h \
        librbd/AsyncObjectThrottle.h \
+       librbd/AsyncRequest.h \
+       librbd/AsyncResizeRequest.h \
+       librbd/AsyncTrimRequest.h \
        librbd/CopyupRequest.h \
        librbd/ImageCtx.h \
        librbd/ImageWatcher.h \
index ada967bd87de137a15b3bdaf5f3e5247523aa899..e4c736e2b93950889c98909ab1ea5ec27138da7c 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 #include "librbd/ObjectMap.h"
 #include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "common/dout.h"
 #include "common/errno.h"
@@ -131,8 +132,8 @@ int ObjectMap::refresh()
   ldout(cct, 20) << "refreshed object map: " << object_map.size()
                  << dendl;
   
-  uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
-                                              m_image_ctx.get_current_size());
+  uint64_t num_objs = Striper::get_num_objects(
+    m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id));
   if (object_map.size() != num_objs) {
     // resize op might have been interrupted
     lderr(cct) << "incorrect object map size: " << object_map.size()
@@ -142,115 +143,205 @@ int ObjectMap::refresh()
   }
   return 0;
 }
-int ObjectMap::resize(uint8_t default_object_state)
-{
-  if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
-    return 0;
-  }
 
-  RWLock::WLocker l(m_image_ctx.object_map_lock);
-  CephContext *cct = m_image_ctx.cct;
-  uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
-                                               m_image_ctx.get_current_size());
-  ldout(cct, 20) << "resizing object map: " << num_objs << dendl;
-  librados::ObjectWriteOperation op;
-  rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
-  cls_client::object_map_resize(&op, num_objs, default_object_state);
-  int r = m_image_ctx.data_ctx.operate(object_map_name(m_image_ctx.id), &op);
-  if (r == -EBUSY) {
-    lderr(cct) << "object map lock not owned by client" << dendl;
-    return r;
-  } else if (r < 0) {
-    lderr(cct) << "error resizing object map: size=" << num_objs << ", "
-               << "state=" << default_object_state << ", "
-               << "error=" << cpp_strerror(r) << dendl;
-    invalidate();
-    return 0;
-  }
 
-  size_t orig_object_map_size = object_map.size();
-  object_map.resize(num_objs);
-  for (uint64_t i = orig_object_map_size; i < object_map.size(); ++i) {
-    object_map[i] = default_object_state;
-  }
-  return 0;
+void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state,
+                          Context *on_finish) {
+  assert((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0);
+  assert(m_image_ctx.owner_lock.is_locked());
+  assert(m_image_ctx.image_watcher->is_lock_owner());
+
+  ResizeRequest *req = new ResizeRequest(
+    m_image_ctx, new_size, default_object_state, on_finish);
+  req->send();
 }
 
-int ObjectMap::update(uint64_t object_no, uint8_t new_state,
-                     const boost::optional<uint8_t> &current_state)
+void ObjectMap::aio_update(uint64_t object_no, uint8_t new_state,
+                          const boost::optional<uint8_t> &current_state,
+                          Context *on_finish)
 {
-  return update(object_no, object_no + 1, new_state, current_state);
+  aio_update(object_no, object_no + 1, new_state, current_state, on_finish);
 }
 
-int ObjectMap::update(uint64_t start_object_no,
-                      uint64_t end_object_no, uint8_t new_state,
-                      const boost::optional<uint8_t> &current_state)
+void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
+                          uint8_t new_state,
+                           const boost::optional<uint8_t> &current_state,
+                           Context *on_finish)
 {
-  if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
-    return 0;
-  }
-
-  RWLock::WLocker l(m_image_ctx.object_map_lock);
-  CephContext *cct = m_image_ctx.cct;
-  assert(start_object_no <= end_object_no);
-  assert(end_object_no <= object_map.size() ||
-         (m_image_ctx.flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0);
-  if (end_object_no > object_map.size()) {
-    ldout(cct, 20) << "skipping update of invalid object map" << dendl;
-    return 0;
-  }
+  assert((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) != 0);
+  assert(m_image_ctx.owner_lock.is_locked());
+  assert(m_image_ctx.image_watcher->is_lock_owner());
 
   bool update_required = false;
-  for (uint64_t object_no = start_object_no; object_no < end_object_no;
-       ++object_no) {
-    if ((!current_state || object_map[object_no] == *current_state) &&
-        object_map[object_no] != new_state) {
-      update_required = true;
-      break;
+  {
+    RWLock::WLocker l(m_image_ctx.object_map_lock);
+    assert(start_object_no < end_object_no);
+  
+    CephContext *cct = m_image_ctx.cct;
+    if (end_object_no > object_map.size()) {
+      ldout(cct, 20) << "skipping update of invalid object map" << dendl;
+      return;
+    }
+  
+    for (uint64_t object_no = start_object_no; object_no < end_object_no;
+         ++object_no) {
+      if ((!current_state || object_map[object_no] == *current_state) &&
+          object_map[object_no] != new_state) {
+        update_required = true;
+        break;
+      }
+    }
+
+    if (update_required) {
+      UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no,
+                                            end_object_no, new_state,
+                                            current_state, on_finish);
+      req->send();
     }
   }
 
   if (!update_required) {
-    return 0;
+    on_finish->complete(0);
   }
+}
 
-  ldout(cct, 20) << "updating object map: [" << start_object_no << ","
-                 << end_object_no << ") = "
-                 << static_cast<uint32_t>(new_state) << dendl;
+void ObjectMap::invalidate() {
+  CephContext *cct = m_image_ctx.cct;
+  lderr(cct) << this << " invalidating object map" << dendl;
+  m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
 
   librados::ObjectWriteOperation op;
-  rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
-  cls_client::object_map_update(&op, start_object_no, end_object_no,
-                                new_state, current_state);
-  int r = m_image_ctx.data_ctx.operate(object_map_name(m_image_ctx.id), &op);
-  if (r == -EBUSY) {
-    lderr(cct) << "object map lock not owned by client" << dendl;
-    return r;
-  } else if (r < 0) {
-    lderr(cct) << "object map update failed: " << cpp_strerror(r) << dendl;
-    invalidate();
-  } else {
-    for (uint64_t object_no = start_object_no; object_no < end_object_no;
-         ++object_no) {
-      if (!current_state || object_map[object_no] == *current_state) {
-        object_map[object_no] = new_state;
+  cls_client::set_flags(&op, m_image_ctx.flags, RBD_FLAG_OBJECT_MAP_INVALID);
+
+  int r = m_image_ctx.md_ctx.operate(m_image_ctx.header_oid, &op);
+  if (r < 0) {
+    lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
+              << dendl;
+  }
+}
+
+bool ObjectMap::Request::should_complete(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " should_complete: r=" << r << dendl;
+
+  switch (m_state)
+  {
+  case STATE_REQUEST:
+    if (r == -EBUSY) {
+      lderr(cct) << "object map lock not owned by client" << dendl;
+      return true;
+    } else if (r < 0) {
+      lderr(cct) << "failed to update object map: " << cpp_strerror(r)
+                << dendl;
+      invalidate();
+      return false;
+    }
+
+    {
+      RWLock::RLocker l(m_image_ctx.md_lock);
+      RWLock::WLocker l2(m_image_ctx.object_map_lock);
+      ObjectMap *object_map = m_image_ctx.object_map;
+      if (object_map != NULL) {
+       finish(object_map);
       }
     }
+    return true;
+
+  case STATE_INVALIDATE:
+    ldout(cct, 20) << "INVALIDATE" << dendl;
+    if (r < 0) {
+      lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
+                << dendl; 
+      return true;
+    }
+    break;
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
   }
-  return 0;
+  return false;
 }
 
-void ObjectMap::invalidate()
-{
-  // TODO: md_lock
+void ObjectMap::Request::invalidate() {
+  CephContext *cct = m_image_ctx.cct;
+  RWLock::WLocker l(m_image_ctx.md_lock);
+
+  lderr(cct) << this << " invalidating object map" << dendl;
+  m_state = STATE_INVALIDATE;
   m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
-  int r = cls_client::set_flags(&m_image_ctx.md_ctx,
-                               m_image_ctx.header_oid,
-                               m_image_ctx.flags,
-                                RBD_FLAG_OBJECT_MAP_INVALID);
-  if (r < 0) {
-    lderr(m_image_ctx.cct) << "Failed to invalidate object map: "
-                          << cpp_strerror(r) << dendl;
+
+  librados::ObjectWriteOperation op;
+  cls_client::set_flags(&op, m_image_ctx.flags, RBD_FLAG_OBJECT_MAP_INVALID);
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                        rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+void ObjectMap::ResizeRequest::send() {
+  CephContext *cct = m_image_ctx.cct;
+
+  RWLock::WLocker l(m_image_ctx.object_map_lock);
+  m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size);
+
+  ldout(cct, 5) << this << " resizing on-disk object map: " << m_num_objs << dendl;
+
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
+  cls_client::object_map_resize(&op, m_num_objs, m_default_object_state);
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
+                                          rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
+  CephContext *cct = m_image_ctx.cct;
+
+  ldout(cct, 5) << this << " resizing in-memory object map: " << m_num_objs << dendl;
+  size_t orig_object_map_size = object_map->object_map.size();
+  object_map->object_map.resize(m_num_objs);
+  for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) {
+    object_map->object_map[i] = m_default_object_state;
+  }
+}
+
+void ObjectMap::UpdateRequest::send() {
+  CephContext *cct = m_image_ctx.cct;
+
+  ldout(cct, 20) << this << " updating on-disk object map: ["
+                << m_start_object_no << "," << m_end_object_no << ") = "
+                << static_cast<uint32_t>(m_new_state) << dendl;
+  
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
+  cls_client::object_map_update(&op, m_start_object_no, m_end_object_no,
+                               m_new_state, m_current_state);
+
+  librados::AioCompletion *rados_completion = create_callback_completion();
+  int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
+                                           rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) {
+  CephContext *cct = m_image_ctx.cct;
+
+  ldout(cct, 20) << this << " updating in-memory object map" << dendl;
+  for (uint64_t object_no = m_start_object_no;
+       object_no < MIN(m_end_object_no, object_map->object_map.size());
+       ++object_no) {
+    if (!m_current_state ||
+       object_map->object_map[object_no] == *m_current_state) {
+      object_map->object_map[object_no] = m_new_state;
+    }
   }
 }
 
index da426d18f3b13fffe2c09ba5d57646f47555079c..648043d38f96eaae8f6a2b1ffefecc5da9286feb 100644 (file)
@@ -4,11 +4,19 @@
 #define CEPH_LIBRBD_OBJECT_MAP_H
 
 #include "include/int_types.h"
+#include "include/rados/librados.hpp"
 #include "common/bit_vector.hpp"
+#include "librbd/AsyncRequest.h"
 #include <boost/optional.hpp>
 
+class Context;
+
 namespace librbd {
 
+static const uint8_t OBJECT_NONEXISTENT = 0;
+static const uint8_t OBJECT_EXISTS = 1;
+static const uint8_t OBJECT_PENDING = 2;
+
 class ImageCtx;
 
 class ObjectMap {
@@ -21,16 +29,88 @@ public:
 
   bool object_may_exist(uint64_t object_no) const;
 
-  int refresh();
-  int resize(uint8_t default_object_state);
+  void aio_resize(uint64_t new_size, uint8_t default_object_state,
+                 Context *on_finish);
+  void aio_update(uint64_t object_no, uint8_t new_state,
+                const boost::optional<uint8_t> &current_state,
+                Context *on_finish);
+  void aio_update(uint64_t start_object_no, uint64_t end_object_no,
+                 uint8_t new_state,
+                 const boost::optional<uint8_t> &current_state,
+                 Context *on_finish);
 
-  int update(uint64_t object_no, uint8_t new_state,
-            const boost::optional<uint8_t> &current_state);
-  int update(uint64_t start_object_no, uint64_t end_object_no,
-            uint8_t new_state, const boost::optional<uint8_t> &current_state);
+  int refresh();
 
 private:
 
+  class Request : public AsyncRequest {
+  public:
+    Request(ImageCtx &image_ctx, Context *on_finish)
+      : AsyncRequest(image_ctx, on_finish), m_state(STATE_REQUEST)
+    {
+    }
+
+  protected:
+    virtual bool should_complete(int r);
+    virtual void finish(ObjectMap *object_map) = 0;
+
+  private:
+    /**
+     * <start> ---> STATE_REQUEST ---> <finish>
+     *                   |                ^
+     *                   v                |
+     *            STATE_INVALIDATE -------/
+     */
+    enum State {
+      STATE_REQUEST,
+      STATE_INVALIDATE
+    };
+
+    State m_state;
+
+    void invalidate();
+  }; 
+
+  class ResizeRequest : public Request {
+  public:
+    ResizeRequest(ImageCtx &image_ctx, uint64_t new_size,
+                 uint8_t default_object_state, Context *on_finish)
+      : Request(image_ctx, on_finish), m_num_objs(0), m_new_size(new_size),
+        m_default_object_state(default_object_state)
+    {
+    }
+
+    virtual void send();
+  protected:
+    virtual void finish(ObjectMap *object_map);
+  private:
+    uint64_t m_num_objs;
+    uint64_t m_new_size;
+    uint8_t m_default_object_state;
+  };
+
+  class UpdateRequest : public Request {
+  public:
+    UpdateRequest(ImageCtx &image_ctx, uint64_t start_object_no,
+                 uint64_t end_object_no, uint8_t new_state,
+                  const boost::optional<uint8_t> &current_state,
+                 Context *on_finish)
+      : Request(image_ctx, on_finish), m_start_object_no(start_object_no),
+       m_end_object_no(end_object_no), m_new_state(new_state),
+       m_current_state(current_state)
+    {
+    }
+
+    virtual void send();
+  protected:
+    virtual void finish(ObjectMap *object_map);
+  private:
+    uint64_t m_start_object_no;
+    uint64_t m_end_object_no;
+    uint8_t m_new_state;
+    boost::optional<uint8_t> m_current_state;
+  };
+
   ImageCtx &m_image_ctx;
 
   ceph::BitVector<2> object_map;
index a200d183e64dc0b602d2175fdeb3883e0bb65d3e..f3966362a20da76f18b671cb4fb359e2aade0009 100644 (file)
@@ -17,7 +17,9 @@
 
 #include "librbd/AioCompletion.h"
 #include "librbd/AioRequest.h"
-#include "librbd/AsyncObjectThrottle.h"
+#include "librbd/AsyncFlattenRequest.h"
+#include "librbd/AsyncResizeRequest.h"
+#include "librbd/AsyncTrimRequest.h"
 #include "librbd/CopyupRequest.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
@@ -30,8 +32,6 @@
 #include "librados/snap_set_diff.h"
 
 #include <boost/bind.hpp>
-#include <boost/lambda/bind.hpp>
-#include <boost/lambda/construct.hpp>
 #include <boost/scope_exit.hpp>
 #include "include/assert.h"
 
@@ -167,30 +167,15 @@ namespace librbd {
     assert(!ictx->image_watcher->is_lock_supported() ||
           ictx->image_watcher->is_lock_owner());
 
-    Mutex my_lock("librbd::trim_image::my_lock");
-    Cond cond;
-    bool done;
-    int ret;
-
-    CephContext *cct = ictx->cct;
-    Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret);
-    ret = async_trim_image(ictx, ctx, ictx->size, newsize, prog_ctx);
-    if (ret < 0) {
-      lderr(cct) << "warning: failed to remove object(s): "
-                << cpp_strerror(ret) << dendl;
-      delete ctx;
-      return;
-    }
-
-    my_lock.Lock();
-    while (!done) {
-      cond.Wait(my_lock);
-    }
-    my_lock.Unlock();
+    C_SaferCond *ctx = new C_SaferCond();
+    AsyncTrimRequest *req = new AsyncTrimRequest(*ictx, ctx, ictx->size,
+                                                newsize, prog_ctx);
+    req->send();
 
-    if (ret < 0) {
-      lderr(cct) << "warning: failed to remove some object(s): "
-                << cpp_strerror(ret) << dendl;
+    int r = ctx->wait();
+    if (r < 0) {
+      lderr(ictx->cct) << "warning: failed to remove some object(s): "
+                      << cpp_strerror(r) << dendl;
     }
   }
 
@@ -1606,9 +1591,7 @@ reprotect_and_return_err:
 
     int r;
     do {
-      Mutex my_lock("librbd::resize::my_lock");
-      Cond cond;
-      bool done;
+      C_SaferCond *ctx;
       {
        RWLock::RLocker l(ictx->owner_lock);
        while (ictx->image_watcher->is_lock_supported()) {
@@ -1626,7 +1609,7 @@ reprotect_and_return_err:
          ldout(ictx->cct, 5) << "resize timed out notifying lock owner" << dendl;
        }
 
-       Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
+       ctx = new C_SaferCond();
        r = async_resize(ictx, ctx, size, prog_ctx);
        if (r < 0) {
          delete ctx;
@@ -1634,40 +1617,17 @@ reprotect_and_return_err:
        }
       }
 
-      my_lock.Lock();
-      while (!done) {
-       cond.Wait(my_lock);
-      }
-      my_lock.Unlock();
-
+      r = ctx->wait();
       if (r == -ERESTART) {
        ldout(ictx->cct, 5) << "resize interrupted: restarting" << dendl;
       }
     } while (r == -ERESTART);
 
+    ictx->perfcounter->inc(l_librbd_resize);
     notify_change(ictx->md_ctx, ictx->header_oid, ictx);
     ldout(cct, 2) << "resize finished" << dendl;
     return r;
   }
-
-  class AsyncResizeFinishContext : public Context {
-  public:
-    AsyncResizeFinishContext(ImageCtx *ictx, Context *ctx)
-      : m_ictx(ictx), m_ctx(ctx)
-    {
-    }
-
-    virtual void finish(int r) {
-      ldout(m_ictx->cct, 2) << "async_resize finished" << dendl;
-      m_ictx->perfcounter->inc(l_librbd_resize);
-      m_ctx->complete(r);
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    Context *m_ctx;
-  };
-
   int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size,
                   ProgressContext &prog_ctx)
   {
@@ -1705,297 +1665,17 @@ reprotect_and_return_err:
       }
     }
 
-    AsyncResizeFinishContext *finish_ctx =
-      new AsyncResizeFinishContext(ictx, ctx);
-    r = async_resize_helper(ictx, finish_ctx, original_size, size, prog_ctx);
-    if (r < 0) {
-      delete ctx;
-      return r;
-    }
+    async_resize_helper(ictx, ctx, original_size, size, prog_ctx);
     return 0;
   }
 
-  class AsyncResizeHelperFinishContext : public Context {
-  public:
-    AsyncResizeHelperFinishContext(ImageCtx *ictx, Context *ctx,
-                                  uint64_t original_size, uint64_t new_size)
-      : m_ictx(ictx), m_ctx(ctx), m_original_size(original_size),
-       m_new_size(new_size)
-    {
-    }
-
-    virtual void finish(int r) {
-      BOOST_SCOPE_EXIT((m_ctx) (m_ictx) (m_new_size) (m_original_size) (&r)) {
-       ldout(m_ictx->cct, 2) << "async_resize_helper finished ("
-                             << r << ")" << dendl;
-
-       if (r < 0) {
-         RWLock::WLocker l(m_ictx->md_lock);
-         if (m_ictx->size == m_new_size) {
-           m_ictx->size = m_original_size;
-         }
-         m_ctx->complete(r);
-       }
-      } BOOST_SCOPE_EXIT_END
-
-      if (r < 0) {
-       return;
-      }
-
-      RWLock::RLocker l(m_ictx->owner_lock);
-      if (m_ictx->image_watcher->is_lock_supported() &&
-          !m_ictx->image_watcher->is_lock_owner()) {
-        r = -ERESTART;
-        return;
-      }
-
-      RWLock::WLocker l2(m_ictx->md_lock);
-      m_ictx->size = m_new_size;
-
-      librados::ObjectWriteOperation op;
-      if (m_ictx->old_format) {
-       // rewrite header
-       bufferlist bl;
-       m_ictx->header.image_size = m_new_size;
-       bl.append((const char *)&m_ictx->header, sizeof(m_ictx->header));
-       op.write(0, bl);
-      } else {
-        if (m_ictx->image_watcher->is_lock_supported()) {
-         m_ictx->image_watcher->assert_header_locked(&op);
-        }
-       cls_client::set_size(&op, m_new_size);
-      }
-
-      librados::AioCompletion *rados_completion =
-       librados::Rados::aio_create_completion(m_ctx, NULL, rados_ctx_cb);
-      r = m_ictx->md_ctx.aio_operate(m_ictx->header_oid, rados_completion, &op);
-      rados_completion->release();
-      if (r < 0) {
-       lderr(m_ictx->cct) << "error writing header: " << cpp_strerror(r)
-                          << dendl;
-       return;
-      }
-
-      if (m_ictx->object_map != NULL) {
-       m_ictx->size = m_new_size;
-       m_ictx->object_map->resize(OBJECT_NONEXISTENT);
-      }
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    Context *m_ctx;
-    uint64_t m_original_size;
-    uint64_t m_new_size;
-  };
-
-  int async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
-                         uint64_t new_size, ProgressContext& prog_ctx)
+  void async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
+                          uint64_t new_size, ProgressContext& prog_ctx)
   {
-    CephContext *cct = ictx->cct;
-    if (original_size == new_size) {
-      ldout(cct, 2) << "no change in size (" << original_size << " -> "
-                   << new_size << ")" << dendl;
-      ctx->complete(0);
-      return 0;
-    }
-
-    AsyncResizeHelperFinishContext *finish_ctx =
-      new AsyncResizeHelperFinishContext(ictx, ctx, original_size, new_size);
-    if (new_size > original_size) {
-      ldout(cct, 2) << "expanding image " << original_size << " -> "
-                   << new_size << dendl;
-      finish_ctx->complete(0);
-    } else {
-      ldout(cct, 2) << "shrinking image " << original_size << " -> "
-                   << new_size << dendl;
-
-      {
-        // update in-memory size to clip concurrent IO operations
-        RWLock::WLocker l(ictx->md_lock);
-        ictx->size = new_size;
-      }
-
-      int r = async_trim_image(ictx, finish_ctx, original_size, new_size,
-                              prog_ctx);
-      if (r < 0) {
-       delete finish_ctx;
-       return r;
-      }
-    }
-    return 0;
-  }
-
-  class AsyncTrimObjectContext : public C_AsyncObjectThrottle {
-  public:
-    AsyncTrimObjectContext(AsyncObjectThrottle &throttle, ImageCtx *ictx,
-        uint64_t object_no)
-      : C_AsyncObjectThrottle(throttle), m_ictx(ictx), m_object_no(object_no)
-    {
-    }
-
-    virtual int send() {
-      {
-       RWLock::RLocker l(m_ictx->md_lock);
-       if (m_ictx->object_map != NULL &&
-           !m_ictx->object_map->object_may_exist(m_object_no)) {
-         return 1;
-       }
-      }
-
-      RWLock::RLocker l(m_ictx->owner_lock);
-      if (m_ictx->image_watcher->is_lock_supported() &&
-          !m_ictx->image_watcher->is_lock_owner()) {
-        return -ERESTART;
-      }
-
-      string oid = m_ictx->get_object_name(m_object_no);
-      librados::AioCompletion *rados_completion =
-       librados::Rados::aio_create_completion(this, NULL, rados_ctx_cb);
-      m_ictx->data_ctx.aio_remove(oid, rados_completion);
-      rados_completion->release();
-      return 0;
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    uint64_t m_object_no;
-  };
-
-  class AsyncTrimFinishContext : public Context {
-  public:
-
-    AsyncTrimFinishContext(ImageCtx *ictx, Context *ctx, uint64_t delete_start,
-         uint64_t num_objects, uint64_t delete_offset,
-         uint64_t new_size)
-      : m_ictx(ictx), m_ctx(ctx), m_delete_start(delete_start),
-       m_num_objects(num_objects), m_delete_offset(delete_offset),
-       m_new_size(new_size)
-    {
-    }
-
-    virtual void finish(int r) {
-      if (r < 0) {
-       m_ctx->complete(r);
-       return;
-      }
-
-      RWLock::RLocker l(m_ictx->owner_lock);
-      if (m_ictx->image_watcher->is_lock_supported() &&
-          !m_ictx->image_watcher->is_lock_owner()) {
-       m_ctx->complete(-ERESTART);
-       return;
-      }
-
-      RWLock::RLocker l2(m_ictx->md_lock);
-      if (m_ictx->object_map != NULL) {
-       m_ictx->object_map->update(m_delete_start, m_num_objects,
-                                  OBJECT_NONEXISTENT, OBJECT_PENDING);
-      }
-
-      if (m_delete_offset <= m_new_size) {
-       m_ctx->complete(r);
-       return;
-      }
-
-      // discard the weird boundary, if any
-      vector<ObjectExtent> extents;
-      Striper::file_to_extents(m_ictx->cct, m_ictx->format_string,
-             &m_ictx->layout, m_new_size,
-             m_delete_offset - m_new_size, 0, extents);
-
-      ContextCompletion *completion = new ContextCompletion(m_ctx, true);
-      for (vector<ObjectExtent>::iterator p = extents.begin();
-          p != extents.end(); ++p) {
-       ldout(m_ictx->cct, 20) << " ex " << *p << dendl;
-       Context *req_comp = new C_ContextCompletion(*completion);
-       librados::AioCompletion *rados_completion =
-         librados::Rados::aio_create_completion(req_comp, NULL, rados_ctx_cb);
-
-       bool flag_nonexistent = false;
-       if (p->offset == 0) {
-         flag_nonexistent = true;
-         if (m_ictx->object_map != NULL) {
-           m_ictx->object_map->update(p->objectno, OBJECT_PENDING,
-                                      OBJECT_EXISTS);
-         }
-         m_ictx->data_ctx.aio_remove(p->oid.name, rados_completion);
-       } else {
-         if (m_ictx->object_map != NULL) {
-           m_ictx->object_map->update(p->objectno, OBJECT_EXISTS,
-                                      boost::optional<uint8_t>());
-         }
-         librados::ObjectWriteOperation op;
-         op.truncate(p->offset);
-         m_ictx->data_ctx.aio_operate(p->oid.name, rados_completion, &op);
-       }
-       rados_completion->release();
-
-       if (flag_nonexistent && m_ictx->object_map != NULL) {
-         m_ictx->object_map->update(p->objectno, OBJECT_NONEXISTENT,
-                                    OBJECT_PENDING);
-       }
-      }
-      completion->finish_adding_requests();
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    Context *m_ctx;
-    uint64_t m_delete_start;
-    uint64_t m_num_objects;
-    uint64_t m_delete_offset;
-    uint64_t m_new_size;
-  };
-
-  int async_trim_image(ImageCtx *ictx, Context *ctx, uint64_t original_size,
-                      uint64_t new_size, ProgressContext& prog_ctx)
-  {
-    CephContext *cct = (CephContext *)ictx->data_ctx.cct();
-
-    uint64_t period = ictx->get_stripe_period();
-    uint64_t new_num_periods = ((new_size + period - 1) / period);
-    uint64_t delete_off = MIN(new_num_periods * period, original_size);
-    // first object we can delete free and clear
-    uint64_t delete_start = new_num_periods * ictx->get_stripe_count();
-    uint64_t num_objects = Striper::get_num_objects(ictx->layout, original_size);
-
-    ldout(cct, 10) << "trim_image " << original_size << " -> " << new_size
-                  << " periods " << new_num_periods
-                  << " discard to offset " << delete_off
-                  << " delete objects " << delete_start
-                  << " to " << (num_objects-1)
-                  << dendl;
-
-    AsyncTrimFinishContext *finish_ctx =
-      new AsyncTrimFinishContext(ictx, ctx, delete_start, num_objects,
-                                delete_off, new_size);
-    if (delete_start < num_objects) {
-      ldout(cct, 2) << "trim_image objects " << delete_start << " to "
-                   << (num_objects - 1) << dendl;
-
-      {
-       RWLock::RLocker l(ictx->md_lock);
-       if (ictx->object_map != NULL) {
-         ictx->object_map->update(delete_start, num_objects, OBJECT_PENDING,
-                                  OBJECT_EXISTS);
-       }
-      }
-
-      AsyncObjectThrottle::ContextFactory context_factory(
-        boost::lambda::bind(boost::lambda::new_ptr<AsyncTrimObjectContext>(),
-          boost::lambda::_1, ictx, boost::lambda::_2));
-      AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
-        context_factory, finish_ctx, prog_ctx, delete_start, num_objects);
-      int r = throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
-      if (r < 0) {
-       delete throttle;
-       return r;
-      }
-    } else {
-      finish_ctx->complete(0);
-    }
-    return 0;
+    assert(ictx->owner_lock.is_locked());
+    AsyncResizeRequest *req = new AsyncResizeRequest(*ictx, ctx, original_size,
+                                                    new_size, prog_ctx);
+    req->send();
   }
 
   int snap_list(ImageCtx *ictx, vector<snap_info_t>& snaps)
@@ -2403,25 +2083,12 @@ reprotect_and_return_err:
       }
     }
 
-    Mutex my_lock("librbd::snap_rollback::my_lock");
-    Cond cond;
-    bool done;
-    Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
-
     ldout(cct, 2) << "resizing to snapshot size..." << dendl;
     NoOpProgressContext no_op;
-    r = async_resize_helper(ictx, ctx, original_size, new_size, no_op);
-    if (r < 0) {
-      delete ctx;
-      return r;
-    }
-
-    my_lock.Lock();
-    while (!done) {
-      cond.Wait(my_lock);
-    }
-    my_lock.Unlock();
+    C_SaferCond *ctx = new C_SaferCond();
+    async_resize_helper(ictx, ctx, original_size, new_size, no_op);
 
+    r = ctx->wait();
     if (r < 0) {
       lderr(cct) << "Error resizing to snapshot size: "
                 << cpp_strerror(r) << dendl;
@@ -2707,7 +2374,6 @@ reprotect_and_return_err:
       ictx->shutdown_cache(); // implicitly flushes
     } else {
       flush(ictx);
-      ictx->wait_for_pending_aio();
     }
 
     if (ictx->copyup_finisher != NULL) {
@@ -2738,140 +2404,6 @@ reprotect_and_return_err:
     delete ictx;
   }
 
-  class AsyncFlattenObjectContext : public C_AsyncObjectThrottle {
-  public:
-    AsyncFlattenObjectContext(AsyncObjectThrottle &throttle, ImageCtx *ictx,
-                             uint64_t object_size, ::SnapContext snapc,
-                             uint64_t object_no)
-      : C_AsyncObjectThrottle(throttle), m_ictx(ictx),
-       m_object_size(object_size), m_snapc(snapc), m_object_no(object_no)
-    {
-    }
-
-    virtual int send() {
-      int r = ictx_check(m_ictx);
-      if (r < 0) {
-        return r;
-      }
-
-      RWLock::RLocker l(m_ictx->owner_lock);
-      if (m_ictx->image_watcher->is_lock_supported() &&
-          !m_ictx->image_watcher->is_lock_owner()) {
-       return -ERESTART;
-      }
-
-      RWLock::RLocker l2(m_ictx->md_lock);
-      uint64_t overlap;
-      {
-        RWLock::RLocker l3(m_ictx->parent_lock);
-        // stop early if the parent went away - it just means
-        // another flatten finished first, so this one is useless.
-        if (!m_ictx->parent) {
-          return 1;
-        }
-
-       // resize might have occurred while flatten is running
-       overlap = min(m_ictx->size, m_ictx->parent_md.overlap);
-      }
-
-      // map child object onto the parent
-      vector<pair<uint64_t,uint64_t> > objectx;
-      Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0,
-            m_object_size, objectx);
-      uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
-      assert(object_overlap <= m_object_size);
-      if (object_overlap == 0) {
-       // resize shrunk image while flattening
-       return 1;
-      }
-
-      bufferlist bl;
-      string oid = m_ictx->get_object_name(m_object_no);
-      AioWrite *req = new AioWrite(m_ictx, oid, m_object_no, 0, objectx,
-                                  object_overlap, bl, m_snapc, CEPH_NOSNAP,
-                                  this);
-      r = req->send();
-      if (r < 0) {
-       lderr(m_ictx->cct) << "failed to flatten object " << oid << dendl;
-       delete req;
-       return r;
-      }
-      return 0;
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    uint64_t m_object_size;
-    ::SnapContext m_snapc;
-    uint64_t m_object_no;
-
-  };
-
-  class AsyncFlattenFinishContext : public Context {
-  public:
-    AsyncFlattenFinishContext(ImageCtx *ictx, Context *ctx,
-                             uint64_t overlap_objects)
-      : m_ictx(ictx), m_ctx(ctx), m_overlap_objects(overlap_objects)
-    {
-    }
-
-    virtual void finish(int r) {
-      BOOST_SCOPE_EXIT((&m_ctx) (&r)) {
-       m_ctx->complete(r);
-      } BOOST_SCOPE_EXIT_END
-
-      CephContext *cct = m_ictx->cct;
-      if (r < 0) {
-        lderr(cct) << "failed to flatten at least one object: "
-                  << cpp_strerror(r) << dendl;
-        return;
-      }
-
-      RWLock::RLocker l(m_ictx->owner_lock);
-      if (m_ictx->image_watcher->is_lock_supported() &&
-          !m_ictx->image_watcher->is_lock_owner()) {
-       r = -ERESTART;
-        return;
-      }
-
-      // remove parent from this (base) image
-      librados::ObjectWriteOperation op;
-      if (m_ictx->image_watcher->is_lock_supported()) {
-       m_ictx->image_watcher->assert_header_locked(&op);
-      }
-      cls_client::remove_parent(&op);
-      r = m_ictx->md_ctx.operate(m_ictx->header_oid, &op);
-      if (r < 0) {
-        lderr(cct) << "error removing parent" << dendl;
-        return;
-      }
-
-      // and if there are no snaps, remove from the children object as well
-      // (if snapshots remain, they have their own parent info, and the child
-      // will be removed when the last snap goes away)
-      m_ictx->snap_lock.get_read();
-      if (m_ictx->snaps.empty()) {
-        ldout(cct, 2) << "removing child from children list..." << dendl;
-        int r = cls_client::remove_child(&m_ictx->md_ctx, RBD_CHILDREN,
-                                        m_ictx->parent_md.spec, m_ictx->id);
-        if (r < 0) {
-         lderr(cct) << "error removing child from children list" << dendl;
-         m_ictx->snap_lock.put_read();
-         return;
-        }
-      }
-      m_ictx->snap_lock.put_read();
-
-      ldout(cct, 20) << "finished flattening" << dendl;
-      return;
-    }
-
-  private:
-    ImageCtx *m_ictx;
-    Context *m_ctx;
-    uint64_t m_overlap_objects;
-  };
-
   // 'flatten' child image by copying all parent's blocks
   int flatten(ImageCtx *ictx, ProgressContext &prog_ctx)
   {
@@ -2884,9 +2416,7 @@ reprotect_and_return_err:
 
     int r;
     do {
-      Mutex my_lock("librbd::flatten:my_lock");
-      Cond cond;
-      bool done;
+      C_SaferCond *ctx;
       {
         RWLock::RLocker l(ictx->owner_lock);
         while (ictx->image_watcher->is_lock_supported()) {
@@ -2904,7 +2434,7 @@ reprotect_and_return_err:
           ldout(ictx->cct, 5) << "flatten timed out notifying lock owner" << dendl;
         }
 
-        Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r);
+       ctx = new C_SaferCond();
         r = async_flatten(ictx, ctx, prog_ctx);
         if (r < 0) {
          delete ctx;
@@ -2912,12 +2442,7 @@ reprotect_and_return_err:
         }
       }
 
-      my_lock.Lock();
-      while (!done) {
-        cond.Wait(my_lock);
-      }
-      my_lock.Unlock();
-
+      r = ctx->wait();
       if (r == -ERESTART) {
        ldout(ictx->cct, 5) << "flatten interrupted: restarting" << dendl;
       }
@@ -2977,20 +2502,10 @@ reprotect_and_return_err:
       overlap_objects = Striper::get_num_objects(ictx->layout, overlap); 
     }
 
-    AsyncObjectThrottle::ContextFactory context_factory(
-      boost::lambda::bind(boost::lambda::new_ptr<AsyncFlattenObjectContext>(),
-        boost::lambda::_1, ictx, object_size, snapc,
-        boost::lambda::_2));
-    AsyncFlattenFinishContext *finish_ctx =
-      new AsyncFlattenFinishContext(ictx, ctx, overlap_objects);
-    AsyncObjectThrottle *throttle = new AsyncObjectThrottle(
-      context_factory, finish_ctx, prog_ctx, 0, overlap_objects);
-    r = throttle->start_ops(cct->_conf->rbd_concurrent_management_ops);
-    if (r < 0) {
-      delete throttle;
-      return r;
-    }
-
+    AsyncFlattenRequest *req =
+      new AsyncFlattenRequest(*ictx, ctx, object_size, overlap_objects,
+                             snapc, prog_ctx);
+    req->send();
     return 0;
   }
 
@@ -3635,6 +3150,7 @@ reprotect_and_return_err:
       r = ictx->flush_cache();
     } else {
       r = ictx->data_ctx.aio_flush();
+      ictx->wait_for_pending_aio();
     }
 
     if (r)
@@ -3724,17 +3240,6 @@ reprotect_and_return_err:
        bl.append(buf + q->first, q->second);
       }
 
-      {
-       RWLock::RLocker l(ictx->md_lock);
-       if (ictx->object_map != NULL) {
-         r = ictx->object_map->update(p->objectno, OBJECT_EXISTS,
-                                      boost::optional<uint8_t>());
-         if (r < 0) {
-           goto done;
-         }
-       }
-      }
-
       C_AioWrite *req_comp = new C_AioWrite(cct, c);
       if (ictx->object_cacher) {
        c->add_request();
@@ -3754,8 +3259,10 @@ reprotect_and_return_err:
 
        req->set_op_flags(op_flags);
        r = req->send();
-       if (r < 0)
+       if (r < 0) {
+         req->complete(r);
          goto done;
+       }
       }
     }
   done:
@@ -3764,8 +3271,6 @@ reprotect_and_return_err:
 
     ictx->perfcounter->inc(l_librbd_aio_wr);
     ictx->perfcounter->inc(l_librbd_aio_wr_bytes, mylen);
-
-    /* FIXME: cleanup all the allocated stuff */
     return r;
   }
 
@@ -3834,15 +3339,9 @@ reprotect_and_return_err:
        object_overlap = ictx->prune_parent_extents(objectx, overlap);
       }
 
-      RWLock::RLocker l(ictx->md_lock);
-      bool flag_nonexistent = false;
       if (p->offset == 0 && p->length == ictx->layout.fl_object_size) {
        req = new AioRemove(ictx, p->oid.name, p->objectno, objectx, object_overlap,
                            snapc, snap_id, req_comp);
-       if (!req->has_parent() && ictx->object_map != NULL) {
-          ictx->object_map->update(p->objectno, OBJECT_PENDING, OBJECT_EXISTS);
-         flag_nonexistent = true;
-       }
       } else if (p->offset + p->length == ictx->layout.fl_object_size) {
        req = new AioTruncate(ictx, p->oid.name, p->objectno, p->offset, objectx, object_overlap,
                              snapc, snap_id, req_comp);
@@ -3852,18 +3351,10 @@ reprotect_and_return_err:
                          snapc, snap_id, req_comp);
       }
 
-      if (!flag_nonexistent && ictx->object_map != NULL) {
-       ictx->object_map->update(p->objectno, OBJECT_EXISTS,
-                                boost::optional<uint8_t>());
-      }
-
       r = req->send();
-      if (r < 0)
+      if (r < 0) {
+       req->complete(r);
        goto done;
-
-      if (flag_nonexistent && ictx->object_map != NULL) {
-       ictx->object_map->update(p->objectno, OBJECT_NONEXISTENT,
-                                OBJECT_PENDING);
       }
     }
     r = 0;
@@ -3878,8 +3369,6 @@ reprotect_and_return_err:
 
     ictx->perfcounter->inc(l_librbd_aio_discard);
     ictx->perfcounter->inc(l_librbd_aio_discard_bytes, len);
-
-    /* FIXME: cleanup all the allocated stuff */
     return r;
   }
 
index 48ddcfcb3e44dbbe3377f9f05d1401fe1c93603c..9984b4b663daa0d0c74a8e27f8b4cc11b2a2607c 100644 (file)
@@ -73,10 +73,6 @@ namespace librbd {
     }
   };
 
-  static const uint8_t OBJECT_NONEXISTENT = 0;
-  static const uint8_t OBJECT_EXISTS = 1;
-  static const uint8_t OBJECT_PENDING = 2;
-
   const std::string id_obj_name(const std::string &name);
   const std::string header_name(const std::string &image_id);
   const std::string old_header_name(const std::string &image_name);
@@ -197,10 +193,8 @@ namespace librbd {
   int async_flatten(ImageCtx *ictx, Context *ctx, ProgressContext &prog_ctx);
   int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size,
                   ProgressContext &prog_ctx);
-  int async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
-                         uint64_t new_size, ProgressContext& prog_ctx);
-  int async_trim_image(ImageCtx *ictx, Context *ctx, uint64_t original_size,
-                      uint64_t new_size, ProgressContext& prog_ctx);
+  void async_resize_helper(ImageCtx *ictx, Context *ctx, uint64_t original_size,
+                          uint64_t new_size, ProgressContext& prog_ctx);
 
   int aio_write(ImageCtx *ictx, uint64_t off, size_t len, const char *buf,
                AioCompletion *c, int op_flags);
index f243cf2627d97842e644713cc9902dfdc2b8b6dc..3b6561b96af678c27fe1f99f6cc39e2a24fdd4c5 100644 (file)
@@ -71,10 +71,12 @@ class ObjectCacher {
     bufferlist bl;
     utime_t mtime;
     int flags;
-    OSDWrite(const SnapContext& sc, bufferlist& b, utime_t mt, int f) : snapc(sc), bl(b), mtime(mt), flags(f) {}
+    OSDWrite(const SnapContext& sc, const bufferlist& b, utime_t mt, int f)
+      : snapc(sc), bl(b), mtime(mt), flags(f) {}
   };
 
-  OSDWrite *prepare_write(const SnapContext& sc, bufferlist &b, utime_t mt, int f) { 
+  OSDWrite *prepare_write(const SnapContext& sc, const bufferlist &b,
+                         utime_t mt, int f) { 
     return new OSDWrite(sc, b, mt, f); 
   }
 
index 3642efe2f876cc5d4d9f962c7eb5bf3caa0db1c7..fb124189c005aa9f403affeeaf3b831e693e6184 100644 (file)
@@ -1041,7 +1041,9 @@ TEST_F(TestClsRbd, flags)
   ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
   ASSERT_EQ(0U, flags);
 
-  ASSERT_EQ(0, set_flags(&ioctx, oid, 3, 2));
+  librados::ObjectWriteOperation op1;
+  set_flags(&op1, 3, 2);
+  ASSERT_EQ(0, ioctx.operate(oid, &op1));
   ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
   ASSERT_EQ(2U, flags);
 
@@ -1049,7 +1051,9 @@ TEST_F(TestClsRbd, flags)
   ASSERT_EQ(-ENOENT, get_flags(&ioctx, oid, snap_id, &flags));
   ASSERT_EQ(0, snapshot_add(&ioctx, oid, snap_id, "snap"));
 
-  ASSERT_EQ(0, set_flags(&ioctx, oid, 31, 4));
+  librados::ObjectWriteOperation op2;
+  set_flags(&op2, 31, 4);
+  ASSERT_EQ(0, ioctx.operate(oid, &op2));
   ASSERT_EQ(0, get_flags(&ioctx, oid, CEPH_NOSNAP, &flags));
   ASSERT_EQ(6U, flags);