]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: batch ObjectMap updations upon trim 11510/head
authorVenky Shankar <vshankar@redhat.com>
Sat, 15 Oct 2016 11:48:30 +0000 (17:18 +0530)
committerVenky Shankar <vshankar@redhat.com>
Thu, 3 Nov 2016 03:23:55 +0000 (08:53 +0530)
Shrinking a clone which has snapshots and does not share
majority of objects with its parent (i.e., there are less
objects to be copied up) involves huge number of object
map updates -- two (pre, post) per object. This results
in lots of requests to be send to OSDs especially when
trimming a gigantus image. This situation can be optimized
by sending batch ObjectMap updates for an object range
thereby significantly cutting down OSD traffic resulting
in faster trim times.

Fixes: http://tracker.ceph.com/issues/17356
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/librbd/AioObjectRequest.h
src/librbd/operation/TrimRequest.cc
src/librbd/operation/TrimRequest.h

index 2c7a14866cf154691407da367792b7e14668c8c3..9ae2a84327c0dacc921a520f6b9e7bab37ce0692 100644 (file)
@@ -330,11 +330,14 @@ private:
 
 class AioObjectTrim : public AbstractAioObjectWrite {
 public:
+  // we'd need to only conditionally specify if a post object map
+  // update is needed. pre update is decided as usual (by checking
+  // the state of the object in the map).
   AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
-                const ::SnapContext &snapc, Context *completion)
+                const ::SnapContext &snapc, Context *completion,
+                bool post_object_map_update)
     : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
-                             true) {
-  }
+                             true), m_post_object_map_update(post_object_map_update) { }
 
 protected:
   virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
@@ -350,8 +353,11 @@ protected:
   }
 
   virtual bool post_object_map_update() {
-    return true;
+    return m_post_object_map_update;
   }
+
+private:
+  bool m_post_object_map_update;
 };
 
 class AioObjectTruncate : public AbstractAioObjectWrite {
index 3992fb75e21c26dd4285c90876f12cacbd2cb0d8..f0ba3209ffa69f7d90558cb9e34f1ca11da36b01 100644 (file)
@@ -46,7 +46,7 @@ public:
     ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
 
     AioObjectRequest<> *req = new AioObjectTrim(&image_ctx, oid, m_object_no,
-                                                m_snapc, this);
+                                                m_snapc, this, false);
     req->send();
     return 0;
   }
@@ -131,8 +131,18 @@ bool TrimRequest<I>::should_complete(int r)
 
   RWLock::RLocker owner_lock(image_ctx.owner_lock);
   switch (m_state) {
+  case STATE_PRE_COPYUP:
+    ldout(cct, 5) << " PRE_COPYUP" << dendl;
+    send_copyup_objects();
+    break;
+
   case STATE_COPYUP_OBJECTS:
     ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
+    send_post_copyup();
+    break;
+
+  case STATE_POST_COPYUP:
+    ldout(cct, 5) << " POST_COPYUP" << dendl;
     send_pre_remove();
     break;
 
@@ -170,58 +180,33 @@ bool TrimRequest<I>::should_complete(int r)
 
 template <typename I>
 void TrimRequest<I>::send() {
-  send_copyup_objects();
+  send_pre_copyup();
 }
 
-template <typename I>
+template<typename I>
 void TrimRequest<I>::send_copyup_objects() {
   I &image_ctx = this->m_image_ctx;
   assert(image_ctx.owner_lock.is_locked());
-  assert(image_ctx.exclusive_lock == nullptr ||
-         image_ctx.exclusive_lock->is_lock_owner());
 
-  if (m_delete_start >= m_num_objects) {
-    send_clean_boundary();
-    return;
-  }
+  ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
+                          << " start object=" << m_copyup_start << ", "
+                          << " end object=" << m_copyup_end << dendl;
+  m_state = STATE_COPYUP_OBJECTS;
 
   ::SnapContext snapc;
-  bool has_snapshots;
-  uint64_t parent_overlap;
   {
     RWLock::RLocker snap_locker(image_ctx.snap_lock);
     RWLock::RLocker parent_locker(image_ctx.parent_lock);
-
     snapc = image_ctx.snapc;
-    has_snapshots = !image_ctx.snaps.empty();
-    int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
-    assert(r == 0);
   }
 
-  // copyup is only required for portion of image that overlaps parent
-  uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout,
-                                                 parent_overlap);
-  // TODO: protect against concurrent shrink and snap create?
-  if (copyup_end <= m_delete_start || !has_snapshots) {
-    send_pre_remove();
-    return;
-  }
-
-  uint64_t copyup_start = m_delete_start;
-  m_delete_start = copyup_end;
-
-  ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
-                           << " start object=" << copyup_start << ", "
-                           << " end object=" << copyup_end << dendl;
-  m_state = STATE_COPYUP_OBJECTS;
-
   Context *ctx = this->create_callback_context();
   typename AsyncObjectThrottle<I>::ContextFactory context_factory(
     boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject<I> >(),
       boost::lambda::_1, &image_ctx, snapc, boost::lambda::_2));
   AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
-    this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
-    copyup_end);
+    this, image_ctx, context_factory, ctx, &m_prog_ctx, m_copyup_start,
+    m_copyup_end);
   throttle->start_ops(image_ctx.concurrent_management_ops);
 }
 
@@ -245,6 +230,68 @@ void TrimRequest<I>::send_remove_objects() {
   throttle->start_ops(image_ctx.concurrent_management_ops);
 }
 
+template<typename I>
+void TrimRequest<I>::send_pre_copyup() {
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.owner_lock.is_locked());
+
+  if (m_delete_start >= m_num_objects) {
+    send_clean_boundary();
+    return;
+  }
+
+  bool has_snapshots;
+  uint64_t parent_overlap;
+  {
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    RWLock::RLocker parent_locker(image_ctx.parent_lock);
+
+    has_snapshots = !image_ctx.snaps.empty();
+    int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
+    assert(r == 0);
+  }
+
+  // copyup is only required for portion of image that overlaps parent
+  m_copyup_end = Striper::get_num_objects(image_ctx.layout, parent_overlap);
+
+  // TODO: protect against concurrent shrink and snap create?
+  // skip to remove if no copyup is required.
+  if (m_copyup_end <= m_delete_start || !has_snapshots) {
+    send_pre_remove();
+    return;
+  }
+
+  m_copyup_start = m_delete_start;
+  m_delete_start = m_copyup_end;
+
+  bool copyup_objects = false;
+  {
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    if (image_ctx.object_map == nullptr) {
+      copyup_objects = true;
+    } else {
+      ldout(image_ctx.cct, 5) << this << " send_pre_copyup: "
+                              << " copyup_start=" << m_copyup_start
+                              << " copyup_end=" << m_copyup_end << dendl;
+      m_state = STATE_PRE_COPYUP;
+
+      assert(image_ctx.exclusive_lock->is_lock_owner());
+
+      Context *ctx = this->create_callback_context();
+      RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
+      if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,
+                                            OBJECT_PENDING, OBJECT_EXISTS, ctx)) {
+        delete ctx;
+        copyup_objects = true;
+      }
+    }
+  }
+
+  if (copyup_objects) {
+    send_copyup_objects();
+  }
+}
+
 template <typename I>
 void TrimRequest<I>::send_pre_remove() {
   I &image_ctx = this->m_image_ctx;
@@ -286,6 +333,39 @@ void TrimRequest<I>::send_pre_remove() {
   }
 }
 
+template<typename I>
+void TrimRequest<I>::send_post_copyup() {
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.owner_lock.is_locked());
+
+  bool pre_remove_objects = false;
+  {
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    if (image_ctx.object_map == nullptr) {
+      pre_remove_objects = true;
+    } else {
+      ldout(image_ctx.cct, 5) << this << " send_post_copyup:"
+                              << " copyup_start=" << m_copyup_start
+                              << " copyup_end=" << m_copyup_end << dendl;
+      m_state = STATE_POST_COPYUP;
+
+      assert(image_ctx.exclusive_lock->is_lock_owner());
+
+      Context *ctx = this->create_callback_context();
+      RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
+      if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,
+                                            OBJECT_NONEXISTENT, OBJECT_PENDING, ctx)) {
+        delete ctx;
+        pre_remove_objects = true;
+      }
+    }
+  }
+
+  if (pre_remove_objects) {
+    send_pre_remove();
+  }
+}
+
 template <typename I>
 void TrimRequest<I>::send_post_remove() {
   I &image_ctx = this->m_image_ctx;
@@ -364,7 +444,7 @@ void TrimRequest<I>::send_clean_boundary() {
     AioObjectRequest<> *req;
     if (p->offset == 0) {
       req = new AioObjectTrim(&image_ctx, p->oid.name, p->objectno, snapc,
-                              req_comp);
+                              req_comp, true);
     } else {
       req = new AioObjectTruncate(&image_ctx, p->oid.name, p->objectno,
                                   p->offset, snapc, req_comp);
index 58312813dfc4a5b5d27da28a719699e23ae6520c..814d1802309787139528c2a454cecb5080dbab8b 100644 (file)
@@ -34,14 +34,17 @@ protected:
    * @verbatim
    *
    *     <start> . . . . > STATE_FINISHED . . . . . . . . .
-   *      |   .                                           .
-   *      |   . . . . . . . . . . . .                     .
-   *      |                         .                     .
-   *      v                         .                     .
-   * STATE_COPYUP_OBJECTS . . .     .                     .
-   *      |                   .     .                     .
-   *      |                   .     .                     .
-   *      v                   v     v                     .
+   *      |    . . . . . . . . . . > . . . . . . . . .    .
+   *      |   /                                      .    .
+   * STATE_PRE_COPYUP ---> STATE_COPYUP_OBJECTS      .    .
+   *                                |                .    .
+   *        /-----------------------/                v    .
+   *        |                                        .    .
+   *        v                                        .    .
+   * STATE_POST_COPYUP. . . > .                      .    .
+   *      |    . . . . . . . . . . < . . . . . . . . .    .
+   *      |    |              .                           .
+   *      v    v              v                           .
    * STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS           .
    *                                |   .   .             .
    *        /-----------------------/   .   . . . . . .   .
@@ -64,7 +67,9 @@ protected:
    */ 
 
   enum State {
+    STATE_PRE_COPYUP,
     STATE_COPYUP_OBJECTS,
+    STATE_POST_COPYUP,
     STATE_PRE_REMOVE,
     STATE_REMOVE_OBJECTS,
     STATE_POST_REMOVE,
@@ -83,14 +88,21 @@ private:
   uint64_t m_new_size;
   ProgressContext &m_prog_ctx;
 
+  uint64_t m_copyup_start;
+  uint64_t m_copyup_end;
+
   TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
              uint64_t original_size, uint64_t new_size,
              ProgressContext &prog_ctx);
 
+  void send_pre_copyup();
   void send_copyup_objects();
-  void send_remove_objects();
+  void send_post_copyup();
+
   void send_pre_remove();
+  void send_remove_objects();
   void send_post_remove();
+
   void send_clean_boundary();
   void send_finish(int r);
 };