From: Jason Dillaman Date: Sat, 7 Feb 2015 14:13:10 +0000 (-0500) Subject: librbd: use separate files for snapshot object maps X-Git-Tag: v0.93~38^2^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0de6b6179abe5ec57e5336bba4638585f26dd22e;p=ceph.git librbd: use separate files for snapshot object maps Instead of relying on the built-in object snapshot support, create a separate object map object for each image snapshot. This will allow a future repair utility to rebuild the object map for an image's snapshots. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 116cb92aa8a1..9fbc04ca4892 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -299,7 +299,7 @@ namespace librbd { data_ctx.snap_set_read(snap_id); if (object_map != NULL) { - object_map->refresh(); + object_map->refresh(in_snap_id); } return 0; } @@ -314,7 +314,7 @@ namespace librbd { data_ctx.snap_set_read(snap_id); if (object_map != NULL) { - object_map->refresh(); + object_map->refresh(CEPH_NOSNAP); } } diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 60b4756affa8..8c88434f6116 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -348,7 +348,7 @@ int ImageWatcher::lock() { unlock(); return r; } - m_image_ctx.object_map->refresh(); + m_image_ctx.object_map->refresh(CEPH_NOSNAP); } bufferlist bl; @@ -396,9 +396,8 @@ void ImageWatcher::release_lock() RWLock::WLocker l(m_image_ctx.owner_lock); { RWLock::WLocker l2(m_image_ctx.md_lock); - m_image_ctx.flush_cache(); + librbd::_flush(&m_image_ctx); } - m_image_ctx.data_ctx.aio_flush(); unlock(); } diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc index a3eec40a9959..3bdf232a43e2 100644 --- a/src/librbd/ObjectMap.cc +++ b/src/librbd/ObjectMap.cc @@ -8,6 +8,7 @@ #include "common/errno.h" #include "include/stringify.h" #include "cls/lock/cls_lock_client.h" +#include #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -20,6 +21,18 @@ ObjectMap::ObjectMap(ImageCtx &image_ctx) { } +std::string ObjectMap::object_map_name(const std::string &image_id, + uint64_t snap_id) { + std::string oid(RBD_OBJECT_MAP_PREFIX + image_id); + if (snap_id != CEPH_NOSNAP) { + std::stringstream snap_suffix; + snap_suffix << "." << std::setfill('0') << std::setw(16) << std::hex + << snap_id; + oid += snap_suffix.str(); + } + return oid; +} + int ObjectMap::lock() { if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { @@ -29,10 +42,10 @@ int ObjectMap::lock() int r; bool broke_lock = false; CephContext *cct = m_image_ctx.cct; + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); while (true) { ldout(cct, 10) << &m_image_ctx << " locking object map" << dendl; - r = rados::cls::lock::lock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + r = rados::cls::lock::lock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", "", "", utime_t(), 0); if (r == 0) { @@ -47,8 +60,7 @@ int ObjectMap::lock() lockers_t lockers; ClsLockType lock_type; std::string lock_tag; - int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + int r = rados::cls::lock::get_lock_info(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, &lockers, &lock_type, &lock_tag); if (r == -ENOENT) { @@ -63,8 +75,7 @@ int ObjectMap::lock() for (lockers_t::iterator it = lockers.begin(); it != lockers.end(); ++it) { const rados::cls::lock::locker_id_t &locker = it->first; - r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + r = rados::cls::lock::break_lock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, locker.cookie, locker.locker); if (r < 0 && r != -ENOENT) { @@ -89,8 +100,8 @@ int ObjectMap::unlock() ldout(m_image_ctx.cct, 10) << &m_image_ctx << " unlocking object map" << dendl; - int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, - object_map_name(m_image_ctx.id), + std::string oid = object_map_name(m_image_ctx.id, CEPH_NOSNAP); + int r = rados::cls::lock::unlock(&m_image_ctx.md_ctx, oid, RBD_LOCK_NAME, ""); if (r < 0 && r != -ENOENT) { lderr(m_image_ctx.cct) << "failed to release object map lock: " @@ -108,52 +119,112 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const } RWLock::RLocker l(m_image_ctx.object_map_lock); - assert(object_no < object_map.size()); + assert(object_no < m_object_map.size()); - bool exists = (object_map[object_no] == OBJECT_EXISTS || - object_map[object_no] == OBJECT_PENDING); + bool exists = (m_object_map[object_no] == OBJECT_EXISTS || + m_object_map[object_no] == OBJECT_PENDING); ldout(m_image_ctx.cct, 20) << &m_image_ctx << " object_may_exist: " << "object_no=" << object_no << " r=" << exists << dendl; return exists; } -int ObjectMap::refresh() +void ObjectMap::refresh(uint64_t snap_id) { if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { - return 0; + return; } CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << &m_image_ctx << " refreshing object map" << dendl; RWLock::WLocker l(m_image_ctx.object_map_lock); - int r = cls_client::object_map_load(&m_image_ctx.data_ctx, - object_map_name(m_image_ctx.id), - &object_map); + std::string oid(object_map_name(m_image_ctx.id, snap_id)); + int r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, + &m_object_map); if (r < 0) { lderr(cct) << "error refreshing object map: " << cpp_strerror(r) << dendl; invalidate(); - object_map.clear(); - return r; + m_object_map.clear(); + return; } - ldout(cct, 20) << "refreshed object map: " << object_map.size() + ldout(cct, 20) << "refreshed object map: " << m_object_map.size() << dendl; uint64_t num_objs = Striper::get_num_objects( - m_image_ctx.layout, m_image_ctx.get_image_size(m_image_ctx.snap_id)); - if (object_map.size() != num_objs) { + m_image_ctx.layout, m_image_ctx.get_image_size(snap_id)); + if (m_object_map.size() != num_objs) { // resize op might have been interrupted - lderr(cct) << "incorrect object map size: " << object_map.size() + lderr(cct) << "incorrect object map size: " << m_object_map.size() << " != " << num_objs << dendl; invalidate(); - return -EINVAL; } - return 0; } +void ObjectMap::rollback(uint64_t snap_id) { + if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { + return; + } + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << &m_image_ctx << " rollback object map" << dendl; + + RWLock::WLocker l(m_image_ctx.object_map_lock); + + std::string snap_oid(object_map_name(m_image_ctx.id, snap_id)); + bufferlist bl; + int r = m_image_ctx.md_ctx.read(snap_oid, bl, 0, 0); + if (r < 0) { + lderr(cct) << "unable to load snapshot object map '" << snap_oid << "': " + << cpp_strerror(r) << dendl; + invalidate(); + return; + } + + librados::ObjectWriteOperation op; + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, "", ""); + op.write_full(bl); + + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + r = m_image_ctx.md_ctx.operate(oid, &op); + if (r < 0) { + lderr(cct) << "unable to rollback object map: " << cpp_strerror(r) + << dendl; + invalidate(); + } +} + +void ObjectMap::snapshot(uint64_t snap_id) { + if ((m_image_ctx.features & RBD_FEATURE_OBJECT_MAP) == 0) { + return; + } + + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << &m_image_ctx << " snapshot object map" << dendl; + + int r; + bufferlist bl; + { + RWLock::RLocker l(m_image_ctx.object_map_lock); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + r = m_image_ctx.md_ctx.read(oid, bl, 0, 0); + if (r < 0) { + lderr(cct) << "unable to load object map: " << cpp_strerror(r) + << dendl; + invalidate(); + } + } + + std::string snap_oid(object_map_name(m_image_ctx.id, snap_id)); + r = m_image_ctx.md_ctx.write_full(snap_oid, bl); + if (r < 0) { + lderr(cct) << "unable to snapshot object map '" << snap_oid << "': " + << cpp_strerror(r) << dendl; + invalidate(); + } +} void ObjectMap::aio_resize(uint64_t new_size, uint8_t default_object_state, Context *on_finish) { @@ -190,15 +261,15 @@ bool ObjectMap::aio_update(uint64_t start_object_no, uint64_t end_object_no, ldout(cct, 20) << &m_image_ctx << " aio_update: start=" << start_object_no << ", end=" << end_object_no << ", new_state=" << static_cast(new_state) << dendl; - if (end_object_no > object_map.size()) { + if (end_object_no > m_object_map.size()) { ldout(cct, 20) << "skipping update of invalid object map" << dendl; return false; } for (uint64_t object_no = start_object_no; object_no < end_object_no; ++object_no) { - if ((!current_state || object_map[object_no] == *current_state) && - object_map[object_no] != new_state) { + if ((!current_state || m_object_map[object_no] == *current_state) && + m_object_map[object_no] != new_state) { UpdateRequest *req = new UpdateRequest(m_image_ctx, start_object_no, end_object_no, new_state, current_state, on_finish); @@ -307,8 +378,8 @@ void ObjectMap::ResizeRequest::send() { cls_client::object_map_resize(&op, m_num_objs, m_default_object_state); librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id), - rados_completion, &op); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); assert(r == 0); rados_completion->release(); } @@ -318,10 +389,11 @@ void ObjectMap::ResizeRequest::finish(ObjectMap *object_map) { ldout(cct, 5) << &m_image_ctx << " resizing in-memory object map: " << m_num_objs << dendl; - size_t orig_object_map_size = object_map->object_map.size(); - object_map->object_map.resize(m_num_objs); - for (uint64_t i = orig_object_map_size; i < object_map->object_map.size(); ++i) { - object_map->object_map[i] = m_default_object_state; + size_t orig_object_map_size = object_map->m_object_map.size(); + object_map->m_object_map.resize(m_num_objs); + for (uint64_t i = orig_object_map_size; + i < object_map->m_object_map.size(); ++i) { + object_map->m_object_map[i] = m_default_object_state; } } @@ -341,8 +413,8 @@ void ObjectMap::UpdateRequest::send() { m_new_state, m_current_state); librados::AioCompletion *rados_completion = create_callback_completion(); - int r = m_image_ctx.data_ctx.aio_operate(object_map_name(m_image_ctx.id), - rados_completion, &op); + std::string oid(object_map_name(m_image_ctx.id, CEPH_NOSNAP)); + int r = m_image_ctx.md_ctx.aio_operate(oid, rados_completion, &op); assert(r == 0); rados_completion->release(); } @@ -352,11 +424,11 @@ void ObjectMap::UpdateRequest::finish(ObjectMap *object_map) { ldout(cct, 20) << &m_image_ctx << " updating in-memory object map" << dendl; for (uint64_t object_no = m_start_object_no; - object_no < MIN(m_end_object_no, object_map->object_map.size()); + object_no < MIN(m_end_object_no, object_map->m_object_map.size()); ++object_no) { if (!m_current_state || - object_map->object_map[object_no] == *m_current_state) { - object_map->object_map[object_no] = m_new_state; + object_map->m_object_map[object_no] == *m_current_state) { + object_map->m_object_map[object_no] = m_new_state; } } } diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h index 0c476448d3c3..2af1db2f057f 100644 --- a/src/librbd/ObjectMap.h +++ b/src/librbd/ObjectMap.h @@ -24,6 +24,9 @@ public: ObjectMap(ImageCtx &image_ctx); + static std::string object_map_name(const std::string &image_id, + uint64_t snap_id); + int lock(); int unlock(); @@ -39,7 +42,9 @@ public: const boost::optional ¤t_state, Context *on_finish); - int refresh(); + void refresh(uint64_t snap_id); + void rollback(uint64_t snap_id); + void snapshot(uint64_t snap_id); private: @@ -113,7 +118,7 @@ private: ImageCtx &m_image_ctx; - ceph::BitVector<2> object_map; + ceph::BitVector<2> m_object_map; void invalidate(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index a34d310828f1..7d6593bb01c1 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -69,11 +69,6 @@ namespace librbd { return image_name + RBD_SUFFIX; } - const string object_map_name(const string &image_id) - { - return RBD_OBJECT_MAP_PREFIX + image_id; - } - int detect_format(IoCtx &io_ctx, const string &name, bool *old_format, uint64_t *size) { @@ -315,7 +310,6 @@ namespace librbd { rollback_object(ictx, snap_id, ictx->get_object_name(i), throttle); prog_ctx.update_progress(i * bsize, numseg * bsize); } - rollback_object(ictx, snap_id, object_map_name(ictx->id), throttle); r = throttle.wait_for_ret(); if (r < 0) { @@ -323,6 +317,13 @@ namespace librbd { << cpp_strerror(r) << dendl; return r; } + + { + RWLock::RLocker l(ictx->md_lock); + if (ictx->object_map != NULL) { + ictx->object_map->rollback(snap_id); + } + } return 0; } @@ -478,11 +479,13 @@ namespace librbd { if (r < 0) return r; + bool lock_owner = false; while (ictx->image_watcher->is_lock_supported()) { r = prepare_image_update(ictx); if (r < 0) { return -EROFS; } else if (ictx->image_watcher->is_lock_owner()) { + lock_owner = true; break; } @@ -493,13 +496,19 @@ namespace librbd { ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl; } - RWLock::RLocker l2(ictx->md_lock); + RWLock::WLocker l2(ictx->md_lock); + r = _flush(ictx); + if (r < 0) { + return r; + } + do { - r = add_snap(ictx, snap_name); + r = add_snap(ictx, snap_name, lock_owner); } while (r == -ESTALE); - if (r < 0) + if (r < 0) { return r; + } if (notify) { notify_change(ictx->md_ctx, ictx->header_oid, ictx); @@ -566,12 +575,20 @@ namespace librbd { } } + if (ictx->object_map != NULL) { + r = ictx->md_ctx.remove(ObjectMap::object_map_name(ictx->id, snap_id)); + if (r < 0 && r != -ENOENT) { + lderr(ictx->cct) << "snap_remove: failed to remove snapshot object map" + << dendl; + return 0; + } + } + r = rm_snap(ictx, snap_name); if (r < 0) return r; r = ictx->data_ctx.selfmanaged_snap_remove(snap_id); - if (r < 0) return r; @@ -886,7 +903,7 @@ reprotect_and_return_err: librados::ObjectWriteOperation op; cls_client::object_map_resize(&op, Striper::get_num_objects(layout, size), OBJECT_NONEXISTENT); - r = io_ctx.operate(object_map_name(id), &op); + r = io_ctx.operate(ObjectMap::object_map_name(id, CEPH_NOSNAP), &op); if (r < 0) { goto err_remove_header; } @@ -1564,7 +1581,7 @@ reprotect_and_return_err: } } if (!old_format) { - r = io_ctx.remove(object_map_name(id)); + r = io_ctx.remove(ObjectMap::object_map_name(id, CEPH_NOSNAP)); if (r < 0 && r != -ENOENT) { lderr(cct) << "error removing image object map" << dendl; } @@ -1726,9 +1743,10 @@ reprotect_and_return_err: } - int add_snap(ImageCtx *ictx, const char *snap_name) + int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner) { assert(ictx->owner_lock.is_locked()); + assert(ictx->md_lock.is_wlocked()); uint64_t snap_id; int r = ictx->md_ctx.selfmanaged_snap_create(&snap_id); @@ -1757,6 +1775,24 @@ reprotect_and_return_err: return r; } + if (!ictx->old_format) { + if (ictx->object_map != NULL) { + ictx->object_map->snapshot(snap_id); + } + if (lock_owner) { + // immediately start using the new snap context if we + // own the exclusive lock + std::vector snaps; + snaps.push_back(snap_id); + snaps.insert(snaps.end(), ictx->snapc.snaps.begin(), + ictx->snapc.snaps.end()); + + ictx->snapc.seq = snap_id; + ictx->snapc.snaps.swap(snaps); + ictx->data_ctx.selfmanaged_snap_set_write_ctx(ictx->snapc.seq, + ictx->snaps); + } + } return 0; } @@ -2016,7 +2052,7 @@ reprotect_and_return_err: } else { ictx->object_map = new ObjectMap(*ictx); if (ictx->snap_exists) { - ictx->object_map->refresh(); + ictx->object_map->refresh(ictx->snap_id); } } diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 1e6f790b2da6..d164f02ff439 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -76,7 +76,6 @@ namespace librbd { const std::string id_obj_name(const std::string &name); const std::string header_name(const std::string &image_id); const std::string old_header_name(const std::string &image_name); - const std::string object_map_name(const std::string &image_id); int detect_format(librados::IoCtx &io_ctx, const std::string &name, bool *old_format, uint64_t *size); @@ -120,7 +119,7 @@ namespace librbd { int snap_unprotect(ImageCtx *ictx, const char *snap_name); int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected); - int add_snap(ImageCtx *ictx, const char *snap_name); + int add_snap(ImageCtx *ictx, const char *snap_name, bool lock_owner); int rm_snap(ImageCtx *ictx, const char *snap_name); int refresh_parent(ImageCtx *ictx); int ictx_check(ImageCtx *ictx);