]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: fixed object map issues discovered via fsx
authorJason Dillaman <dillaman@redhat.com>
Mon, 2 Feb 2015 20:08:42 +0000 (15:08 -0500)
committerJosh Durgin <jdurgin@redhat.com>
Tue, 3 Feb 2015 12:15:00 +0000 (13:15 +0100)
The object map wasn't being properly refreshed after setting
the snapshot context on the parent image. Additionally fixed
a potential deadlock that could have occurred if no object
map update was required when trimming an image.

Fixes: #10706
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
Reviewed-by: Josh Durgin <jdurgin@redhat.com>
src/librbd/AioRequest.cc
src/librbd/AsyncResizeRequest.cc
src/librbd/AsyncTrimRequest.cc
src/librbd/CopyupRequest.cc
src/librbd/ImageCtx.cc
src/librbd/ObjectMap.cc
src/librbd/ObjectMap.h
src/librbd/internal.cc
src/test/pybind/test_rbd.py

index c4948f81ee461aae06bd72bdec2037d97799c191..35ba2e6bd896878020ac5ebc65827449be7a1f74 100644 (file)
@@ -438,8 +438,11 @@ namespace librbd {
         m_state = LIBRBD_AIO_WRITE_PRE; 
         FunctionContext *ctx = new FunctionContext(
           boost::bind(&AioRequest::complete, this, _1));
-        m_ictx->object_map->aio_update(m_object_no, new_state,
-                                      current_state, ctx);
+        if (!m_ictx->object_map->aio_update(m_object_no, new_state,
+                                           current_state, ctx)) {
+         // no object map update required
+         return false;
+       }
       }
     }
     
@@ -469,8 +472,11 @@ namespace librbd {
     m_state = LIBRBD_AIO_WRITE_POST;
     FunctionContext *ctx = new FunctionContext(
       boost::bind(&AioRequest::complete, this, _1));
-    m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT,
-                                   OBJECT_PENDING, ctx);
+    if (!m_ictx->object_map->aio_update(m_object_no, OBJECT_NONEXISTENT,
+                                       OBJECT_PENDING, ctx)) {
+      // no object map update required
+      return true;
+    }
     return false;  
   } 
 
index d0e330775abd59bd5dcb1ff57ce438c6143f7f72..af2a3cdf8dccf27ab3fb3e8faed38ee453922a86 100644 (file)
@@ -33,7 +33,7 @@ bool AsyncResizeRequest::should_complete(int r)
   switch (m_state) {
   case STATE_TRIM_IMAGE:
     ldout(cct, 5) << "TRIM_IMAGE" << dendl;
-    send_grow_object_map();
+    send_update_header();
     break;
 
   case STATE_GROW_OBJECT_MAP:
@@ -127,10 +127,10 @@ void AsyncResizeRequest::send_grow_object_map() {
     }
   }
 
+  // avoid possible recursive lock attempts
   if (!object_map_enabled) {
     send_update_header();
   } else if (lost_exclusive_lock) {
-    // only complete when not holding locks
     complete(-ERESTART);
   }
 }
@@ -160,8 +160,8 @@ bool AsyncResizeRequest::send_shrink_object_map() {
     }
   }
 
+  // avoid possible recursive lock attempts
   if (lost_exclusive_lock) {
-    // only complete when not holding locks
     complete(-ERESTART);
   }
   return false;
@@ -207,8 +207,8 @@ void AsyncResizeRequest::send_update_header() {
     }
   }
 
+  // avoid possible recursive lock attempts
   if (lost_exclusive_lock) {
-    // only complete when not holding locks
     complete(-ERESTART);
   }
 }
index 610d2d011a1e9e24ce34e30c61eb0e1d91804494..3bbab55252582bfdf9228a387ac5e28d57ffe392 100644 (file)
@@ -159,61 +159,75 @@ void AsyncTrimRequest::send_remove_objects() {
 }
 
 void AsyncTrimRequest::send_pre_remove() {
+  bool remove_objects = false;
   bool lost_exclusive_lock = false;
   {
     RWLock::RLocker l(m_image_ctx.owner_lock);
     RWLock::RLocker l2(m_image_ctx.md_lock);
     if (m_image_ctx.object_map == NULL) {
-      send_remove_objects();
-      return;
-    }
-
-    ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
-                             << " delete_start=" << m_delete_start
-                             << " num_objects=" << m_num_objects << dendl;
-    m_state = STATE_PRE_REMOVE;
-
-    if (!m_image_ctx.image_watcher->is_lock_owner()) {
-      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
-      lost_exclusive_lock = true;
+      remove_objects = true;
     } else {
-      // flag the objects as pending deletion
-      m_image_ctx.object_map->aio_update(
-       m_delete_start, m_num_objects, OBJECT_PENDING, OBJECT_EXISTS,
-       create_callback_context());
+      ldout(m_image_ctx.cct, 5) << this << " send_pre_remove: "
+                               << " delete_start=" << m_delete_start
+                               << " num_objects=" << m_num_objects << dendl;
+      m_state = STATE_PRE_REMOVE;
+
+      if (!m_image_ctx.image_watcher->is_lock_owner()) {
+        ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+        lost_exclusive_lock = true;
+      } else {
+        // flag the objects as pending deletion
+        if (!m_image_ctx.object_map->aio_update(m_delete_start, m_num_objects,
+                                               OBJECT_PENDING, OBJECT_EXISTS,
+                                               create_callback_context())) {
+          remove_objects = true;
+        }
+      }
     }
   }
 
-  if (lost_exclusive_lock) {
+  // avoid possible recursive lock attempts
+  if (remove_objects) {
+    // no object map update required
+    send_remove_objects();
+  } else if (lost_exclusive_lock) {
     complete(-ERESTART);
   }
 }
 
 bool AsyncTrimRequest::send_post_remove() {
+  bool clean_boundary = false;
   bool lost_exclusive_lock = false;
   {
     RWLock::RLocker l(m_image_ctx.owner_lock);
     RWLock::RLocker l2(m_image_ctx.md_lock);
     if (m_image_ctx.object_map == NULL) {
-      return send_clean_boundary();
-    }
-
-    ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
-                             << " delete_start=" << m_delete_start
-                             << " num_objects=" << m_num_objects << dendl;
-    m_state = STATE_POST_REMOVE;
-
-    if (!m_image_ctx.image_watcher->is_lock_owner()) {
-      ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+      clean_boundary = true;
     } else {
-      // flag the pending objects as removed
-      m_image_ctx.object_map->aio_update(
-       m_delete_start, m_num_objects, OBJECT_NONEXISTENT, OBJECT_PENDING,
-       create_callback_context());
+      ldout(m_image_ctx.cct, 5) << this << " send_post_remove: "
+                               << " delete_start=" << m_delete_start
+                               << " num_objects=" << m_num_objects << dendl;
+      m_state = STATE_POST_REMOVE;
+
+      if (!m_image_ctx.image_watcher->is_lock_owner()) {
+        ldout(m_image_ctx.cct, 1) << "lost exclusive lock during trim" << dendl;
+      } else {
+        // flag the pending objects as removed
+        if (!m_image_ctx.object_map->aio_update(m_delete_start, m_num_objects,
+                                               OBJECT_NONEXISTENT,
+                                               OBJECT_PENDING,
+                                               create_callback_context())) {
+         clean_boundary = true;
+       }
+      }
     }
   }
 
-  if (lost_exclusive_lock) {
+  // avoid possible recursive lock attempts
+  if (clean_boundary) {
+    // no object map update required
+    return send_clean_boundary();
+  } else if (lost_exclusive_lock) {
     complete(-ERESTART);
   }
   return false;
@@ -287,6 +301,7 @@ bool AsyncTrimRequest::send_clean_boundary() {
 
   }
 
+  // avoid possible recursive lock attempts
   if (lost_exclusive_lock) {
     complete(-ERESTART);
   } else if (completion != NULL) {
index 0a6df6a60c3d7cf659a55e4c69db99acd16f087d..fa4fe934ba8b66d2eeafa7f5bf74f67c6ad27628 100644 (file)
@@ -171,28 +171,32 @@ namespace librbd {
   }
 
   bool CopyupRequest::send_object_map() {
-    bool object_map_enabled = true;
+    bool copyup = false;
     {
       RWLock::RLocker l(m_ictx->owner_lock);
       RWLock::RLocker l2(m_ictx->md_lock);
       if (m_ictx->object_map == NULL) {
-       object_map_enabled = false;
+       copyup = true;
       } else if (!m_ictx->image_watcher->is_lock_owner()) {
        ldout(m_ictx->cct, 20) << "exclusive lock not held for copy-on-read"
                               << dendl;
        return true; 
       } else {
        m_state = STATE_OBJECT_MAP;
-        m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS,
-                                      boost::optional<uint8_t>(),
-                                      create_callback_context());
+        if (!m_ictx->object_map->aio_update(m_object_no, OBJECT_EXISTS,
+                                           boost::optional<uint8_t>(),
+                                           create_callback_context())) {
+         copyup = true;
+       }
       }
     }
 
-    if (!object_map_enabled) {
+    // avoid possible recursive lock attempts
+    if (copyup) {
+      // no object map update required
       send_copyup();
       return true;
-    }        
+    }
     return false;
   }
 
index f8649e28e611a25685631ecd579d41cb70dcdaff..398b49b69dbbb5e35ee365a51a88f0f37483c0c0 100644 (file)
@@ -298,6 +298,10 @@ namespace librbd {
       snap_name = in_snap_name;
       snap_exists = true;
       data_ctx.snap_set_read(snap_id);
+
+      if (object_map != NULL) {
+        object_map->refresh();
+      }
       return 0;
     }
     return -ENOENT;
@@ -309,6 +313,10 @@ namespace librbd {
     snap_name = "";
     snap_exists = true;
     data_ctx.snap_set_read(snap_id);
+
+    if (object_map != NULL) {
+      object_map->refresh();
+    }
   }
 
   snap_t ImageCtx::get_snap_id(string in_snap_name) const
index e4c736e2b93950889c98909ab1ea5ec27138da7c..f179fa83302900f15edbdd2f6c52b868e7875665 100644 (file)
@@ -6,6 +6,7 @@
 #include "librbd/internal.h"
 #include "common/dout.h"
 #include "common/errno.h"
+#include "include/stringify.h"
 #include "cls/lock/cls_lock_client.h"
 
 #define dout_subsys ceph_subsys_rbd
@@ -29,7 +30,7 @@ int ObjectMap::lock()
   bool broke_lock = false;
   CephContext *cct = m_image_ctx.cct;
   while (true) {
-    ldout(cct, 10) << "locking object map" << dendl;
+    ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl;
     r = rados::cls::lock::lock(&m_image_ctx.md_ctx,
                               object_map_name(m_image_ctx.id),
                               RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "",
@@ -86,6 +87,8 @@ int ObjectMap::unlock()
     return 0;
   }
 
+  ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map"
+                            << dendl;
   int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx,
                                   object_map_name(m_image_ctx.id),
                                    RBD_LOCK_NAME, "");
@@ -106,8 +109,13 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const
 
   RWLock::RLocker l(m_image_ctx.object_map_lock);
   assert(object_no < object_map.size());
-  return (object_map[object_no] == OBJECT_EXISTS ||
-          object_map[object_no] == OBJECT_PENDING);
+
+  bool exists = (object_map[object_no] == OBJECT_EXISTS ||
+                object_map[object_no] == OBJECT_PENDING);
+  ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: "
+                            << "object_no=" << object_no << " r=" << exists
+                            << dendl;
+  return exists;
 }
 
 int ObjectMap::refresh()
@@ -115,9 +123,11 @@ int ObjectMap::refresh()
   if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
     return 0;
   }
-  
-  RWLock::WLocker l(m_image_ctx.object_map_lock);
+
   CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl;
+
+  RWLock::WLocker l(m_image_ctx.object_map_lock);
   int r = cls_client::object_map_load(&m_image_ctx.data_ctx,
                                      object_map_name(m_image_ctx.id),
                                       &object_map);
@@ -156,14 +166,15 @@ void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state,
   req->send();
 }
 
-void ObjectMap::aio_update(uint64_t object_no, uint8_t new_state,
+bool ObjectMap::aio_update(uint64_t object_no, uint8_t new_state,
                           const boost::optional<uint8_t> &current_state,
                           Context *on_finish)
 {
-  aio_update(object_no, object_no + 1, new_state, current_state, on_finish);
+  return aio_update(object_no, object_no + 1, new_state, current_state,
+                   on_finish);
 }
 
-void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
+bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
                           uint8_t new_state,
                            const boost::optional<uint8_t> &current_state,
                            Context *on_finish)
@@ -172,42 +183,32 @@ void ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.image_watcher->is_lock_owner());
 
-  bool update_required = false;
-  {
-    RWLock::WLocker l(m_image_ctx.object_map_lock);
-    assert(start_object_no < end_object_no);
+  RWLock::WLocker l(m_image_ctx.object_map_lock);
+  assert(start_object_no < end_object_no);
   
-    CephContext *cct = m_image_ctx.cct;
-    if (end_object_no > object_map.size()) {
-      ldout(cct, 20) << "skipping update of invalid object map" << dendl;
-      return;
-    }
+  CephContext *cct = m_image_ctx.cct;
+  if (end_object_no > object_map.size()) {
+    ldout(cct, 20) << "skipping update of invalid object map" << dendl;
+    return false;
+  }
   
-    for (uint64_t object_no = start_object_no; object_no < end_object_no;
-         ++object_no) {
-      if ((!current_state || object_map[object_no] == *current_state) &&
-          object_map[object_no] != new_state) {
-        update_required = true;
-        break;
-      }
-    }
-
-    if (update_required) {
+  for (uint64_t object_no = start_object_no; object_no < end_object_no;
+       ++object_no) {
+    if ((!current_state || object_map[object_no] == *current_state) &&
+        object_map[object_no] != new_state) {
       UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no,
                                             end_object_no, new_state,
                                             current_state, on_finish);
       req->send();
+      return true;
     }
   }
-
-  if (!update_required) {
-    on_finish->complete(0);
-  }
+  return false;
 }
 
 void ObjectMap::invalidate() {
   CephContext *cct = m_image_ctx.cct;
-  lderr(cct) << this << " invalidating object map" << dendl;
+  lderr(cct) << &m_image_ctx << " invalidating object map" << dendl;
   m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
 
   librados::ObjectWriteOperation op;
@@ -222,7 +223,7 @@ void ObjectMap::invalidate() {
 
 bool ObjectMap::Request::should_complete(int r) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " should_complete: r=" << r << dendl;
+  ldout(cct, 20) << &m_image_ctx << " should_complete: r=" << r << dendl;
 
   switch (m_state)
   {
@@ -252,9 +253,8 @@ bool ObjectMap::Request::should_complete(int r) {
     if (r < 0) {
       lderr(cct) << "failed to invalidate object map: " << cpp_strerror(r)
                 << dendl; 
-      return true;
     }
-    break;
+    return true;
 
   default:
     lderr(cct) << "invalid state: " << m_state << dendl;
@@ -268,7 +268,7 @@ void ObjectMap::Request::invalidate() {
   CephContext *cct = m_image_ctx.cct;
   RWLock::WLocker l(m_image_ctx.md_lock);
 
-  lderr(cct) << this << " invalidating object map" << dendl;
+  lderr(cct) << &m_image_ctx << " invalidating object map" << dendl;
   m_state = STATE_INVALIDATE;
   m_image_ctx.flags |= RBD_FLAG_OBJECT_MAP_INVALID;
 
@@ -288,7 +288,8 @@ void ObjectMap::ResizeRequest::send() {
   RWLock::WLocker l(m_image_ctx.object_map_lock);
   m_num_objs = Striper::get_num_objects(m_image_ctx.layout, m_new_size);
 
-  ldout(cct, 5) << this << " resizing on-disk object map: " << m_num_objs << dendl;
+  ldout(cct, 5) << &m_image_ctx << " resizing on-disk object map: "
+               << m_num_objs << dendl;
 
   librados::ObjectWriteOperation op;
   rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
@@ -304,7 +305,8 @@ void ObjectMap::ResizeRequest::send() {
 void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
   CephContext *cct = m_image_ctx.cct;
 
-  ldout(cct, 5) << this << " resizing in-memory object map: " << m_num_objs << dendl;
+  ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: "
+               << m_num_objs << dendl;
   size_t orig_object_map_size = object_map->object_map.size();
   object_map->object_map.resize(m_num_objs);
   for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) {
@@ -315,9 +317,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
 void ObjectMap::UpdateRequest::send() {
   CephContext *cct = m_image_ctx.cct;
 
-  ldout(cct, 20) << this << " updating on-disk object map: ["
+  ldout(cct, 20) << &m_image_ctx << " updating on-disk object map: ["
                 << m_start_object_no << "," << m_end_object_no << ") = "
-                << static_cast<uint32_t>(m_new_state) << dendl;
+                << (m_current_state ? stringify(*m_current_state) : "")
+                << "->" << static_cast<uint32_t>(m_new_state)
+                << dendl;
   
   librados::ObjectWriteOperation op;
   rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
@@ -334,7 +338,7 @@ void ObjectMap::UpdateRequest::send() {
 void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) {
   CephContext *cct = m_image_ctx.cct;
 
-  ldout(cct, 20) << this << " updating in-memory object map" << dendl;
+  ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl;
   for (uint64_t object_no = m_start_object_no;
        object_no < MIN(m_end_object_no, object_map->object_map.size());
        ++object_no) {
index 648043d38f96eaae8f6a2b1ffefecc5da9286feb..29eac9730af9bc497861b5b8e064dca441917fb2 100644 (file)
@@ -31,10 +31,10 @@ public:
 
   void aio_resize(uint64_t new_size, uint8_t default_object_state,
                  Context *on_finish);
-  void aio_update(uint64_t object_no, uint8_t new_state,
-                const boost::optional<uint8_t> &current_state,
-                Context *on_finish);
-  void aio_update(uint64_t start_object_no, uint64_t end_object_no,
+  bool aio_update(uint64_t object_no, uint8_t new_state,
+                 const boost::optional<uint8_t> &current_state,
+                 Context *on_finish);
+  bool aio_update(uint64_t start_object_no, uint64_t end_object_no,
                  uint8_t new_state,
                  const boost::optional<uint8_t> &current_state,
                  Context *on_finish);
index f75c14cbb94ca345c340bd7d3fad2393c270a902..285f0551eebe7fd60fd9968b16178ea4303a4626 100644 (file)
@@ -2281,10 +2281,6 @@ reprotect_and_return_err:
     if (r < 0) {
       return r;
     }
-
-    if (ictx->object_map != NULL) {
-      ictx->object_map->refresh();
-    }
     refresh_parent(ictx);
     return 0;
   }
index 4d3cf9d4cfbbcdb53195ced125114ee97aeb9bf3..985bcf0c408bfbc9a8282f312406eef31e6994d6 100644 (file)
@@ -741,6 +741,7 @@ class TestClone(object):
 
     def test_resize_io(self):
         parent_data = self.image.read(IMG_SIZE / 2, 256)
+        self.image.resize(0)
         self.clone.resize(IMG_SIZE / 2 + 128)
         child_data = self.clone.read(IMG_SIZE / 2, 128)
         eq(child_data, parent_data[:128])