]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: use separate files for snapshot object maps
authorJason Dillaman <dillaman@redhat.com>
Sat, 7 Feb 2015 14:13:10 +0000 (09:13 -0500)
committerJosh Durgin <jdurgin@redhat.com>
Mon, 16 Feb 2015 22:27:25 +0000 (14:27 -0800)
Instead of relying on the built-in object snapshot support,
create a separate object map object for each image snapshot.
This will allow a future repair utility to rebuild the object
map for an image's snapshots.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/ImageCtx.cc
src/librbd/ImageWatcher.cc
src/librbd/ObjectMap.cc
src/librbd/ObjectMap.h
src/librbd/internal.cc
src/librbd/internal.h

index 116cb92aa8a1f6426310f717b6af9590b19dc7d5..9fbc04ca489287e8e3c6c9e49df59bcbb0f8487d 100644 (file)
@@ -299,7 +299,7 @@ namespace librbd {
       data_ctx.snap_set_read(snap_id);
 
       if (object_map != NULL) {
-        object_map->refresh();
+        object_map->refresh(in_snap_id);
       }
       return 0;
     }
@@ -314,7 +314,7 @@ namespace librbd {
     data_ctx.snap_set_read(snap_id);
 
     if (object_map != NULL) {
-      object_map->refresh();
+      object_map->refresh(CEPH_NOSNAP);
     }
   }
 
index 60b4756affa80036e27eff216e967fd809d764a7..8c88434f6116bddd21eac1a0b47005c626d27799 100644 (file)
@@ -348,7 +348,7 @@ int ImageWatcher::lock() {
       unlock();
       return r;
     }
-    m_image_ctx.object_map->refresh();
+    m_image_ctx.object_map->refresh(CEPH_NOSNAP);
   }
 
   bufferlist bl;
@@ -396,9 +396,8 @@ void ImageWatcher::release_lock()
   RWLock::WLocker l(m_image_ctx.owner_lock);
   {
     RWLock::WLocker l2(m_image_ctx.md_lock);
-    m_image_ctx.flush_cache();
+    librbd::_flush(&m_image_ctx);
   }
-  m_image_ctx.data_ctx.aio_flush();
 
   unlock();
 }
index a3eec40a9959d0c132752f8463259b9923863091..3bdf232a43e282c05ae4a9cfbf19e4ee9238a444 100644 (file)
@@ -8,6 +8,7 @@
 #include "common/errno.h"
 #include "include/stringify.h"
 #include "cls/lock/cls_lock_client.h"
+#include <sstream>
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -20,6 +21,18 @@ ObjectMap::ObjectMap(ImageCtx &image_ctx)
 {
 }
 
+std::string ObjectMap::object_map_name(const std::string &image_id,
+                                      uint64_t snap_id) {
+  std::string oid(RBD_OBJECT_MAP_PREFIX + image_id);
+  if (snap_id != CEPH_NOSNAP) {
+    std::stringstream snap_suffix;
+    snap_suffix << "." << std::setfill('0') << std::setw(16) << std::hex
+               << snap_id;
+    oid += snap_suffix.str();
+  }
+  return oid;
+}
+
 int ObjectMap::lock()
 {
   if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
@@ -29,10 +42,10 @@ int ObjectMap::lock()
   int r;
   bool broke_lock = false;
   CephContext *cct = m_image_ctx.cct;
+  std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
   while (true) {
     ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl;
-    r = rados::cls::lock::lock(&m_image_ctx.md_ctx,
-                              object_map_name(m_image_ctx.id),
+    r = rados::cls::lock::lock(&m_image_ctx.md_ctx, oid,
                               RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "",
                               utime_t(), 0);
     if (r == 0) {
@@ -47,8 +60,7 @@ int ObjectMap::lock()
     lockers_t lockers;
     ClsLockType lock_type;
     std::string lock_tag;
-    int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx,
-                                           object_map_name(m_image_ctx.id),
+    int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, oid,
                                             RBD_LOCK_NAME, &lockers,
                                             &lock_type, &lock_tag);
     if (r == -ENOENT) {
@@ -63,8 +75,7 @@ int ObjectMap::lock()
     for (lockers_t::iterator it = lockers.begin();
          it != lockers.end(); ++it) {
       const rados::cls::lock::locker_id_t &locker = it->first;
-      r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx,
-                                      object_map_name(m_image_ctx.id),
+      r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, oid,
                                        RBD_LOCK_NAME, locker.cookie,
                                        locker.locker);
       if (r < 0 && r != -ENOENT) {
@@ -89,8 +100,8 @@ int ObjectMap::unlock()
 
   ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map"
                             << dendl;
-  int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx,
-                                  object_map_name(m_image_ctx.id),
+  std::string oid = object_map_name(m_image_ctx.id, CEPH_NOSNAP);
+  int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, oid,
                                    RBD_LOCK_NAME, "");
   if (r < 0 && r != -ENOENT) {
     lderr(m_image_ctx.cct) << "failed to release object map lock: "
@@ -108,52 +119,112 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const
   }
 
   RWLock::RLocker l(m_image_ctx.object_map_lock);
-  assert(object_no < object_map.size());
+  assert(object_no < m_object_map.size());
 
-  bool exists = (object_map[object_no] == OBJECT_EXISTS ||
-                object_map[object_no] == OBJECT_PENDING);
+  bool exists = (m_object_map[object_no] == OBJECT_EXISTS ||
+                m_object_map[object_no] == OBJECT_PENDING);
   ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: "
                             << "object_no=" << object_no << " r=" << exists
                             << dendl;
   return exists;
 }
 
-int ObjectMap::refresh()
+void ObjectMap::refresh(uint64_t snap_id)
 { 
   if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
-    return 0;
+    return;
   }
 
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl;
 
   RWLock::WLocker l(m_image_ctx.object_map_lock);
-  int r = cls_client::object_map_load(&m_image_ctx.data_ctx,
-                                     object_map_name(m_image_ctx.id),
-                                      &object_map);
+  std::string oid(object_map_name(m_image_ctx.id, snap_id));
+  int r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid,
+                                      &m_object_map);
   if (r < 0) { 
     lderr(cct) << "error refreshing object map: " << cpp_strerror(r)
                << dendl;
     invalidate();
-    object_map.clear();
-    return r;
+    m_object_map.clear();
+    return;
   }
   
-  ldout(cct, 20) << "refreshed object map: " << object_map.size()
+  ldout(cct, 20) << "refreshed object map: " << m_object_map.size()
                  << dendl;
   
   uint64_t num_objs = Striper::get_num_objects(
-    m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id));
-  if (object_map.size() != num_objs) {
+    m_image_ctx.layout, m_image_ctx.get_image_size(snap_id));
+  if (m_object_map.size() != num_objs) {
     // resize op might have been interrupted
-    lderr(cct) << "incorrect object map size: " << object_map.size()
+    lderr(cct) << "incorrect object map size: " << m_object_map.size()
                << " != " << num_objs << dendl;
     invalidate();
-    return -EINVAL;
   }
-  return 0;
 }
 
+void ObjectMap::rollback(uint64_t snap_id) {
+  if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << &m_image_ctx << " rollback object map" << dendl;
+
+  RWLock::WLocker l(m_image_ctx.object_map_lock);
+
+  std::string snap_oid(object_map_name(m_image_ctx.id, snap_id));
+  bufferlist bl;
+  int r = m_image_ctx.md_ctx.read(snap_oid, bl, 0, 0);
+  if (r < 0) {
+    lderr(cct) << "unable to load snapshot object map '" << snap_oid << "': "
+              << cpp_strerror(r) << dendl;
+    invalidate();
+    return;
+  }
+
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "");
+  op.write_full(bl);
+
+  std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+  r = m_image_ctx.md_ctx.operate(oid, &op);
+  if (r < 0) {
+    lderr(cct) << "unable to rollback object map: " << cpp_strerror(r)
+              << dendl;
+    invalidate();
+  }
+}
+
+void ObjectMap::snapshot(uint64_t snap_id) {
+  if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) {
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << &m_image_ctx << " snapshot object map" << dendl;
+
+  int r;
+  bufferlist bl;
+  {
+    RWLock::RLocker l(m_image_ctx.object_map_lock);
+    std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+    r = m_image_ctx.md_ctx.read(oid, bl, 0, 0);
+    if (r < 0) {
+      lderr(cct) << "unable to load object map: " << cpp_strerror(r)
+                << dendl;
+      invalidate();
+    }
+  }
+
+  std::string snap_oid(object_map_name(m_image_ctx.id, snap_id));
+  r = m_image_ctx.md_ctx.write_full(snap_oid, bl);
+  if (r < 0) {
+    lderr(cct) << "unable to snapshot object map '" << snap_oid << "': "
+              << cpp_strerror(r) << dendl;
+    invalidate();
+  }
+}
 
 void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state,
                           Context *on_finish) {
@@ -190,15 +261,15 @@ bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no,
   ldout(cct, 20) << &m_image_ctx << " aio_update: start=" << start_object_no
                 << ", end=" << end_object_no << ", new_state="
                 << static_cast<uint32_t>(new_state) << dendl;
-  if (end_object_no > object_map.size()) {
+  if (end_object_no > m_object_map.size()) {
     ldout(cct, 20) << "skipping update of invalid object map" << dendl;
     return false;
   }
   
   for (uint64_t object_no = start_object_no; object_no < end_object_no;
        ++object_no) {
-    if ((!current_state || object_map[object_no] == *current_state) &&
-        object_map[object_no] != new_state) {
+    if ((!current_state || m_object_map[object_no] == *current_state) &&
+        m_object_map[object_no] != new_state) {
       UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no,
                                             end_object_no, new_state,
                                             current_state, on_finish);
@@ -307,8 +378,8 @@ void ObjectMap::ResizeRequest::send() {
   cls_client::object_map_resize(&op, m_num_objs, m_default_object_state);
 
   librados::AioCompletion *rados_completion = create_callback_completion();
-  int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
-                                          rados_completion, &op);
+  std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+  int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
   assert(r == 0);
   rados_completion->release();
 }
@@ -318,10 +389,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) {
 
   ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: "
                << m_num_objs << dendl;
-  size_t orig_object_map_size = object_map->object_map.size();
-  object_map->object_map.resize(m_num_objs);
-  for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) {
-    object_map->object_map[i] = m_default_object_state;
+  size_t orig_object_map_size = object_map->m_object_map.size();
+  object_map->m_object_map.resize(m_num_objs);
+  for (uint64_t i = orig_object_map_size;
+       i < object_map->m_object_map.size(); ++i) {
+    object_map->m_object_map[i] = m_default_object_state;
   }
 }
 
@@ -341,8 +413,8 @@ void ObjectMap::UpdateRequest::send() {
                                m_new_state, m_current_state);
 
   librados::AioCompletion *rados_completion = create_callback_completion();
-  int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id),
-                                           rados_completion, &op);
+  std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP));
+  int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op);
   assert(r == 0);
   rados_completion->release();
 }
@@ -352,11 +424,11 @@ void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) {
 
   ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl;
   for (uint64_t object_no = m_start_object_no;
-       object_no < MIN(m_end_object_no, object_map->object_map.size());
+       object_no < MIN(m_end_object_no, object_map->m_object_map.size());
        ++object_no) {
     if (!m_current_state ||
-       object_map->object_map[object_no] == *m_current_state) {
-      object_map->object_map[object_no] = m_new_state;
+       object_map->m_object_map[object_no] == *m_current_state) {
+      object_map->m_object_map[object_no] = m_new_state;
     }
   }
 }
index 0c476448d3c339a0ab61f11a7a5586c38986b2a0..2af1db2f057fc85a916cc4244b83385b9abd219e 100644 (file)
@@ -24,6 +24,9 @@ public:
 
   ObjectMap(ImageCtx &image_ctx);
 
+  static std::string object_map_name(const std::string &image_id,
+                                    uint64_t snap_id);
+
   int lock();
   int unlock();
 
@@ -39,7 +42,9 @@ public:
                  const boost::optional<uint8_t> &current_state,
                  Context *on_finish);
 
-  int refresh();
+  void refresh(uint64_t snap_id);
+  void rollback(uint64_t snap_id);
+  void snapshot(uint64_t snap_id);
 
 private:
 
@@ -113,7 +118,7 @@ private:
 
   ImageCtx &m_image_ctx;
 
-  ceph::BitVector<2> object_map;
+  ceph::BitVector<2> m_object_map;
 
   void invalidate();
 
index a34d310828f1782200228fae25da09a4f5b43713..7d6593bb01c1d73a1ab889d2169c42b9a861a985 100644 (file)
@@ -69,11 +69,6 @@ namespace librbd {
     return image_name + RBD_SUFFIX;
   }
 
-  const string object_map_name(const string &image_id)
-  {
-    return RBD_OBJECT_MAP_PREFIX + image_id;
-  }
-
   int detect_format(IoCtx &io_ctx, const string &name,
                    bool *old_format, uint64_t *size)
   {
@@ -315,7 +310,6 @@ namespace librbd {
       rollback_object(ictx, snap_id, ictx->get_object_name(i), throttle);
       prog_ctx.update_progress(i * bsize, numseg * bsize);
     }
-    rollback_object(ictx, snap_id, object_map_name(ictx->id), throttle);
 
     r = throttle.wait_for_ret();
     if (r < 0) {
@@ -323,6 +317,13 @@ namespace librbd {
                     << cpp_strerror(r) << dendl;
       return r;
     }
+
+    {
+      RWLock::RLocker l(ictx->md_lock);
+      if (ictx->object_map != NULL) {
+       ictx->object_map->rollback(snap_id);
+      }
+    }
     return 0;
   }
 
@@ -478,11 +479,13 @@ namespace librbd {
     if (r < 0)
       return r;
 
+    bool lock_owner = false;
     while (ictx->image_watcher->is_lock_supported()) {
       r = prepare_image_update(ictx);
       if (r < 0) {
        return -EROFS;
       } else if (ictx->image_watcher->is_lock_owner()) {
+       lock_owner = true;
        break;
       }
 
@@ -493,13 +496,19 @@ namespace librbd {
       ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl;
     }
 
-    RWLock::RLocker l2(ictx->md_lock);
+    RWLock::WLocker l2(ictx->md_lock);
+    r = _flush(ictx);
+    if (r < 0) {
+      return r;
+    }
+
     do {
-      r = add_snap(ictx, snap_name);
+      r = add_snap(ictx, snap_name, lock_owner);
     } while (r == -ESTALE);
 
-    if (r < 0)
+    if (r < 0) {
       return r;
+    }
 
     if (notify) {
       notify_change(ictx->md_ctx, ictx->header_oid, ictx);
@@ -566,12 +575,20 @@ namespace librbd {
       }
     }
 
+    if (ictx->object_map != NULL) {
+      r = ictx->md_ctx.remove(ObjectMap::object_map_name(ictx->id, snap_id));
+      if (r < 0 && r != -ENOENT) {
+       lderr(ictx->cct) << "snap_remove: failed to remove snapshot object map"
+                        << dendl;
+       return 0;
+      }
+    }
+
     r = rm_snap(ictx, snap_name);
     if (r < 0)
       return r;
 
     r = ictx->data_ctx.selfmanaged_snap_remove(snap_id);
-
     if (r < 0)
       return r;
 
@@ -886,7 +903,7 @@ reprotect_and_return_err:
       librados::ObjectWriteOperation op;
       cls_client::object_map_resize(&op, Striper::get_num_objects(layout, size),
                                     OBJECT_NONEXISTENT);
-      r = io_ctx.operate(object_map_name(id), &op);
+      r = io_ctx.operate(ObjectMap::object_map_name(id, CEPH_NOSNAP), &op);
       if (r < 0) {
         goto err_remove_header;
       }
@@ -1564,7 +1581,7 @@ reprotect_and_return_err:
       }
     }
     if (!old_format) {
-      r = io_ctx.remove(object_map_name(id));
+      r = io_ctx.remove(ObjectMap::object_map_name(id, CEPH_NOSNAP));
       if (r < 0 && r != -ENOENT) {
        lderr(cct) << "error removing image object map" << dendl;
       }
@@ -1726,9 +1743,10 @@ reprotect_and_return_err:
   }
 
 
-  int add_snap(ImageCtx *ictx, const char *snap_name)
+  int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner)
   {
     assert(ictx->owner_lock.is_locked());
+    assert(ictx->md_lock.is_wlocked());
     uint64_t snap_id;
 
     int r = ictx->md_ctx.selfmanaged_snap_create(&snap_id);
@@ -1757,6 +1775,24 @@ reprotect_and_return_err:
       return r;
     }
 
+    if (!ictx->old_format) {
+      if (ictx->object_map != NULL) {
+       ictx->object_map->snapshot(snap_id);
+      }
+      if (lock_owner) {
+       // immediately start using the new snap context if we
+       // own the exclusive lock
+       std::vector<snapid_t> snaps;
+       snaps.push_back(snap_id);
+       snaps.insert(snaps.end(), ictx->snapc.snaps.begin(),
+                    ictx->snapc.snaps.end());
+
+       ictx->snapc.seq = snap_id;
+       ictx->snapc.snaps.swap(snaps);
+       ictx->data_ctx.selfmanaged_snap_set_write_ctx(ictx->snapc.seq,
+                                                     ictx->snaps);
+      }
+    }
     return 0;
   }
 
@@ -2016,7 +2052,7 @@ reprotect_and_return_err:
       } else {
        ictx->object_map = new ObjectMap(*ictx);
        if (ictx->snap_exists) {
-         ictx->object_map->refresh();
+         ictx->object_map->refresh(ictx->snap_id);
        }
       }
 
index 1e6f790b2da6dd01544ac59ae3032a746a8fa86c..d164f02ff439a7100f7102947084d5f063120f9a 100644 (file)
@@ -76,7 +76,6 @@ namespace librbd {
   const std::string id_obj_name(const std::string &name);
   const std::string header_name(const std::string &image_id);
   const std::string old_header_name(const std::string &image_name);
-  const std::string object_map_name(const std::string &image_id);
 
   int detect_format(librados::IoCtx &io_ctx, const std::string &name,
                    bool *old_format, uint64_t *size);
@@ -120,7 +119,7 @@ namespace librbd {
   int snap_unprotect(ImageCtx *ictx, const char *snap_name);
   int snap_is_protected(ImageCtx *ictx, const char *snap_name,
                        bool *is_protected);
-  int add_snap(ImageCtx *ictx, const char *snap_name);
+  int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner);
   int rm_snap(ImageCtx *ictx, const char *snap_name);
   int refresh_parent(ImageCtx *ictx);
   int ictx_check(ImageCtx *ictx);