From 7e751ce9125cbeb04b9551afff4b91c6255c03f9 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 10 Nov 2014 10:25:13 -0500 Subject: [PATCH] librbd: Implement object map for tracking in-use objects The object map is stored in a new 'rbd_object_map.' object in the image's pool. The object map tracks blocks that are in-use, non-existent, and are pending deletion. Signed-off-by: Jason Dillaman --- src/common/config_opts.h | 2 +- src/include/rbd/features.h | 15 +++-- src/include/rbd_types.h | 2 + src/librbd/ImageCtx.cc | 115 +++++++++++++++++++++++++++++++++++++ src/librbd/ImageCtx.h | 14 ++++- src/librbd/internal.cc | 5 ++ src/librbd/internal.h | 5 ++ src/rbd.cc | 14 +++-- 8 files changed, 159 insertions(+), 13 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 378df88157601..d7f53cebf24c7 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -878,7 +878,7 @@ OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 fea OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature OPTION(rbd_default_features, OPT_INT, 7) // only applies to format 2 images // +1 for layering, +2 for stripingv2, - // +4 for exclusive lock + // +4 for exclusive lock, +8 for object map OPTION(nss_db_path, OPT_STR, "") // path to nss db diff --git a/src/include/rbd/features.h b/src/include/rbd/features.h index 7e35baac348e8..1d170679d49dd 100644 --- a/src/include/rbd/features.h +++ b/src/include/rbd/features.h @@ -4,15 +4,18 @@ #define RBD_FEATURE_LAYERING (1<<0) #define RBD_FEATURE_STRIPINGV2 (1<<1) #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) +#define RBD_FEATURE_OBJECT_MAP (1<<3) -#define RBD_FEATURES_INCOMPATIBLE (RBD_FEATURE_LAYERING | \ +#define RBD_FEATURES_INCOMPATIBLE (RBD_FEATURE_LAYERING | \ RBD_FEATURE_STRIPINGV2) -#define RBD_FEATURES_RW_INCOMPATIBLE (RBD_FEATURES_INCOMPATIBLE | \ - RBD_FEATURE_EXCLUSIVE_LOCK) +#define RBD_FEATURES_RW_INCOMPATIBLE (RBD_FEATURES_INCOMPATIBLE | \ + RBD_FEATURE_EXCLUSIVE_LOCK | \ + RBD_FEATURE_OBJECT_MAP) -#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ - RBD_FEATURE_STRIPINGV2 | \ - RBD_FEATURE_EXCLUSIVE_LOCK) +#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ + RBD_FEATURE_STRIPINGV2 | \ + RBD_FEATURE_EXCLUSIVE_LOCK | \ + RBD_FEATURE_OBJECT_MAP) #endif diff --git a/src/include/rbd_types.h b/src/include/rbd_types.h index 558bbaada8b19..ad1c1b94502cf 100644 --- a/src/include/rbd_types.h +++ b/src/include/rbd_types.h @@ -18,12 +18,14 @@ /* New-style rbd image 'foo' consists of objects * rbd_id.foo - id of image * rbd_header. - image metadata + * rbd_object_map. - optional image object map * rbd_data..00000000 * rbd_data..00000001 * ... - data */ #define RBD_HEADER_PREFIX "rbd_header." +#define RBD_OBJECT_MAP_PREFIX "rbd_object_map." #define RBD_DATA_PREFIX "rbd_data." #define RBD_ID_PREFIX "rbd_id." diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index ddc5cfe524fb1..53a37ae827cc2 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -46,6 +46,7 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + object_map_lock("librbd::ImageCtx::object_map_lock"), aio_lock("librbd::ImageCtx::aio_lock"), copyup_list_lock("librbd::ImageCtx::copyup_list_lock"), copyup_list_cond(), @@ -682,4 +683,118 @@ namespace librbd { copyup_list_cond.Wait(copyup_list_lock); } } + + int ImageCtx::refresh_object_map() + { + if ((features & RBD_FEATURE_OBJECT_MAP) == 0) { + return 0; + } + + int r = cls_client::object_map_load(&data_ctx, object_map_name(id), + &object_map); + if (r < 0) { + lderr(cct) << "error refreshing object map: " << cpp_strerror(r) + << dendl; + // TODO: flag object map as invalid + object_map.clear(); + return r; + } + + ldout(cct, 20) << "refreshed object map: " << object_map.size() + << dendl; + + uint64_t num_objs = Striper::get_num_objects(layout, get_current_size()); + if (object_map.size() != num_objs) { + // resize op might have been interrupted + lderr(cct) << "incorrect object map size: " << object_map.size() + << " != " << num_objs << dendl; + // TODO: flag object map as invalid + return -EINVAL; + } + return 0; + } + + int ImageCtx::resize_object_map(uint8_t default_object_state) + { + if ((features & RBD_FEATURE_OBJECT_MAP) == 0) { + return 0; + } + + uint64_t num_objs = Striper::get_num_objects(layout, get_current_size()); + ldout(cct, 20) << "resizing object map: " << num_objs << dendl; + librados::ObjectWriteOperation op; + cls_client::object_map_resize(&op, num_objs, default_object_state); + int r = data_ctx.operate(object_map_name(id), &op); + if (r < 0) { + lderr(cct) << "error resizing object map: size=" << num_objs << ", " + << "state=" << default_object_state << ", " + << "error=" << cpp_strerror(r) << dendl; + // TODO: flag object map as invalid + return 0; + } + + size_t orig_object_map_size = object_map.size(); + object_map.resize(num_objs); + for (uint64_t i = orig_object_map_size; i < object_map.size(); ++i) { + object_map[i] = default_object_state; + } + return 0; + } + + int ImageCtx::update_object_map(uint64_t object_no, uint8_t object_state) + { + return update_object_map(object_no, object_no + 1, object_state, + boost::optional()); + } + + int ImageCtx::update_object_map(uint64_t start_object_no, + uint64_t end_object_no, uint8_t new_state, + const boost::optional ¤t_state) + { + if ((features & RBD_FEATURE_OBJECT_MAP) == 0) { + return 0; + } + + assert(start_object_no <= end_object_no); + assert(/* flagged as invalid || */ end_object_no <= object_map.size()); + if (end_object_no > object_map.size()) { + ldout(cct, 20) << "skipping update of invalid object map" << dendl; + return 0; + } + + bool update_required = false; + for (uint64_t object_no = start_object_no; object_no < end_object_no; + ++object_no) { + if ((!current_state || object_map[object_no] == *current_state) && + object_map[object_no] != new_state) { + update_required = true; + break; + } + } + + if (!update_required) { + return 0; + } + + ldout(cct, 20) << "updating object map: [" << start_object_no << "," + << end_object_no << ") = " + << static_cast(new_state) << dendl; + + librados::ObjectWriteOperation op; + cls_client::object_map_update(&op, start_object_no, end_object_no, + new_state, current_state); + int r = data_ctx.operate(object_map_name(id), &op); + if (r < 0) { + lderr(cct) << "object map update failed: " << cpp_strerror(r) << dendl; + // TODO: disable object map + } else { + for (uint64_t object_no = start_object_no; object_no < end_object_no; + ++object_no) { + if (!current_state || object_map[object_no] == *current_state) { + object_map[object_no] = new_state; + } + } + } + return r; + } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 41dfd33980051..e6c741f14c80b 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -9,11 +9,13 @@ #include #include #include +#include #include "common/Cond.h" #include "common/Mutex.h" #include "common/Readahead.h" #include "common/RWLock.h" +#include "common/bit_vector.hpp" #include "common/snap_types.h" #include "include/buffer.h" #include "include/rbd/librbd.hpp" @@ -65,7 +67,7 @@ namespace librbd { /** * Lock ordering: * owner_lock, md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, - * aio_lock + * object_map_lock, aio_lock */ RWLock owner_lock; // protects exclusive lock leadership updates RWLock md_lock; // protects access to the mutable image metadata that @@ -75,6 +77,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + RWLock object_map_lock; // protects object map updates Mutex aio_lock; // protects pending_aio and pending_aio_cond Mutex copyup_list_lock; // protects copyup_waiting_list @@ -109,6 +112,8 @@ namespace librbd { Cond pending_aio_cond; uint64_t pending_aio; + ceph::BitVector<2> object_map; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -174,6 +179,13 @@ namespace librbd { uint64_t overlap); void wait_for_pending_aio(); void wait_for_pending_copyup(); + + int refresh_object_map(); + int resize_object_map(uint8_t default_object_state); + int update_object_map(uint64_t object_no, uint8_t object_state); + int update_object_map(uint64_t start_object_no, uint64_t end_object_no, + uint8_t new_state, + const boost::optional ¤t_state); }; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 7c51b5c36580a..50b99f26022f7 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -68,6 +68,11 @@ namespace librbd { return image_name + RBD_SUFFIX; } + const string object_map_name(const string &image_id) + { + return RBD_OBJECT_MAP_PREFIX + image_id; + } + int detect_format(IoCtx &io_ctx, const string &name, bool *old_format, uint64_t *size) { diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 1578f00ccbdc7..af0bbb5dbc358 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -72,9 +72,14 @@ namespace librbd { } }; + static const uint8_t OBJECT_NONEXISTENT = 0; + static const uint8_t OBJECT_EXISTS = 1; + static const uint8_t OBJECT_PENDING = 2; + const std::string id_obj_name(const std::string &name); const std::string header_name(const std::string &image_id); const std::string old_header_name(const std::string &image_name); + const std::string object_map_name(const std::string &image_id); int detect_format(librados::IoCtx &io_ctx, const std::string &name, bool *old_format, uint64_t *size); diff --git a/src/rbd.cc b/src/rbd.cc index 40f158593a9a9..da35e8c2e7bdb 100644 --- a/src/rbd.cc +++ b/src/rbd.cc @@ -174,6 +174,8 @@ static string feature_str(uint64_t feature) return "striping"; case RBD_FEATURE_EXCLUSIVE_LOCK: return "exclusive"; + case RBD_FEATURE_OBJECT_MAP: + return "object map"; default: return ""; } @@ -183,7 +185,7 @@ static string features_str(uint64_t features) { string s = ""; - for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK; + for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP; feature <<= 1) { if (feature & features) { if (s.size()) @@ -197,7 +199,7 @@ static string features_str(uint64_t features) static void format_features(Formatter *f, uint64_t features) { f->open_array_section("features"); - for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK; + for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP; feature <<= 1) { f->dump_string("feature", feature_str(feature)); } @@ -437,7 +439,8 @@ static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx, r = rbd.create(io_ctx, imgname, size, order); } else { if (features == 0) { - features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK; + features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK | + RBD_FEATURE_OBJECT_MAP; } if ((stripe_unit || stripe_count) && (stripe_unit != (1ull << *order) && stripe_count != 1)) { @@ -2469,7 +2472,8 @@ int main(int argc, const char **argv) bool format_specified = false, output_format_specified = false; int format = 1; - uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK; + uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK | + RBD_FEATURE_OBJECT_MAP; const char *imgname = NULL, *snapname = NULL, *destname = NULL, *dest_poolname = NULL, *dest_snapname = NULL, *path = NULL, *devpath = NULL, *lock_cookie = NULL, *lock_client = NULL, @@ -2573,7 +2577,7 @@ int main(int argc, const char **argv) } else if (ceph_argparse_flag(args, i , "--allow-shrink", (char *)NULL)) { resize_allow_shrink = true; } else if (ceph_argparse_flag(args, i, "--image-shared", (char *)NULL)) { - features &= ~RBD_FEATURE_EXCLUSIVE_LOCK; + features &= ~(RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_OBJECT_MAP); } else if (ceph_argparse_witharg(args, i, &val, "--format", (char *) NULL)) { long long ret = strict_strtoll(val.c_str(), 10, &parse_err); if (parse_err.empty()) { -- 2.39.5