OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
OPTION(rbd_default_features, OPT_INT, 7) // only applies to format 2 images
// +1 for layering, +2 for stripingv2,
- // +4 for exclusive lock
+ // +4 for exclusive lock, +8 for object map
OPTION(nss_db_path, OPT_STR, "") // path to nss db
#define RBD_FEATURE_LAYERING (1<<0)
#define RBD_FEATURE_STRIPINGV2 (1<<1)
#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
+#define RBD_FEATURE_OBJECT_MAP (1<<3)
-#define RBD_FEATURES_INCOMPATIBLE (RBD_FEATURE_LAYERING | \
+#define RBD_FEATURES_INCOMPATIBLE (RBD_FEATURE_LAYERING | \
RBD_FEATURE_STRIPINGV2)
-#define RBD_FEATURES_RW_INCOMPATIBLE (RBD_FEATURES_INCOMPATIBLE | \
- RBD_FEATURE_EXCLUSIVE_LOCK)
+#define RBD_FEATURES_RW_INCOMPATIBLE (RBD_FEATURES_INCOMPATIBLE | \
+ RBD_FEATURE_EXCLUSIVE_LOCK | \
+ RBD_FEATURE_OBJECT_MAP)
-#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
- RBD_FEATURE_STRIPINGV2 | \
- RBD_FEATURE_EXCLUSIVE_LOCK)
+#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
+ RBD_FEATURE_STRIPINGV2 | \
+ RBD_FEATURE_EXCLUSIVE_LOCK | \
+ RBD_FEATURE_OBJECT_MAP)
#endif
/* New-style rbd image 'foo' consists of objects
* rbd_id.foo - id of image
* rbd_header.<id> - image metadata
+ * rbd_object_map.<id> - optional image object map
* rbd_data.<id>.00000000
* rbd_data.<id>.00000001
* ... - data
*/
#define RBD_HEADER_PREFIX "rbd_header."
+#define RBD_OBJECT_MAP_PREFIX "rbd_object_map."
#define RBD_DATA_PREFIX "rbd_data."
#define RBD_ID_PREFIX "rbd_id."
snap_lock("librbd::ImageCtx::snap_lock"),
parent_lock("librbd::ImageCtx::parent_lock"),
refresh_lock("librbd::ImageCtx::refresh_lock"),
+ object_map_lock("librbd::ImageCtx::object_map_lock"),
aio_lock("librbd::ImageCtx::aio_lock"),
copyup_list_lock("librbd::ImageCtx::copyup_list_lock"),
copyup_list_cond(),
copyup_list_cond.Wait(copyup_list_lock);
}
}
+
+ int ImageCtx::refresh_object_map()
+ {
+ if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+ return 0;
+ }
+
+ int r = cls_client::object_map_load(&data_ctx, object_map_name(id),
+ &object_map);
+ if (r < 0) {
+ lderr(cct) << "error refreshing object map: " << cpp_strerror(r)
+ << dendl;
+ // TODO: flag object map as invalid
+ object_map.clear();
+ return r;
+ }
+
+ ldout(cct, 20) << "refreshed object map: " << object_map.size()
+ << dendl;
+
+ uint64_t num_objs = Striper::get_num_objects(layout, get_current_size());
+ if (object_map.size() != num_objs) {
+ // resize op might have been interrupted
+ lderr(cct) << "incorrect object map size: " << object_map.size()
+ << " != " << num_objs << dendl;
+ // TODO: flag object map as invalid
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ int ImageCtx::resize_object_map(uint8_t default_object_state)
+ {
+ if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+ return 0;
+ }
+
+ uint64_t num_objs = Striper::get_num_objects(layout, get_current_size());
+ ldout(cct, 20) << "resizing object map: " << num_objs << dendl;
+ librados::ObjectWriteOperation op;
+ cls_client::object_map_resize(&op, num_objs, default_object_state);
+ int r = data_ctx.operate(object_map_name(id), &op);
+ if (r < 0) {
+ lderr(cct) << "error resizing object map: size=" << num_objs << ", "
+ << "state=" << default_object_state << ", "
+ << "error=" << cpp_strerror(r) << dendl;
+ // TODO: flag object map as invalid
+ return 0;
+ }
+
+ size_t orig_object_map_size = object_map.size();
+ object_map.resize(num_objs);
+ for (uint64_t i = orig_object_map_size; i < object_map.size(); ++i) {
+ object_map[i] = default_object_state;
+ }
+ return 0;
+ }
+
+ int ImageCtx::update_object_map(uint64_t object_no, uint8_t object_state)
+ {
+ return update_object_map(object_no, object_no + 1, object_state,
+ boost::optional<uint8_t>());
+ }
+
+ int ImageCtx::update_object_map(uint64_t start_object_no,
+ uint64_t end_object_no, uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state)
+ {
+ if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+ return 0;
+ }
+
+ assert(start_object_no <= end_object_no);
+ assert(/* flagged as invalid || */ end_object_no <= object_map.size());
+ if (end_object_no > object_map.size()) {
+ ldout(cct, 20) << "skipping update of invalid object map" << dendl;
+ return 0;
+ }
+
+ bool update_required = false;
+ for (uint64_t object_no = start_object_no; object_no < end_object_no;
+ ++object_no) {
+ if ((!current_state || object_map[object_no] == *current_state) &&
+ object_map[object_no] != new_state) {
+ update_required = true;
+ break;
+ }
+ }
+
+ if (!update_required) {
+ return 0;
+ }
+
+ ldout(cct, 20) << "updating object map: [" << start_object_no << ","
+ << end_object_no << ") = "
+ << static_cast<uint32_t>(new_state) << dendl;
+
+ librados::ObjectWriteOperation op;
+ cls_client::object_map_update(&op, start_object_no, end_object_no,
+ new_state, current_state);
+ int r = data_ctx.operate(object_map_name(id), &op);
+ if (r < 0) {
+ lderr(cct) << "object map update failed: " << cpp_strerror(r) << dendl;
+ // TODO: disable object map
+ } else {
+ for (uint64_t object_no = start_object_no; object_no < end_object_no;
+ ++object_no) {
+ if (!current_state || object_map[object_no] == *current_state) {
+ object_map[object_no] = new_state;
+ }
+ }
+ }
+ return r;
+ }
}
#include <set>
#include <string>
#include <vector>
+#include <boost/optional.hpp>
#include "common/Cond.h"
#include "common/Mutex.h"
#include "common/Readahead.h"
#include "common/RWLock.h"
+#include "common/bit_vector.hpp"
#include "common/snap_types.h"
#include "include/buffer.h"
#include "include/rbd/librbd.hpp"
/**
* Lock ordering:
* owner_lock, md_lock, cache_lock, snap_lock, parent_lock, refresh_lock,
- * aio_lock
+ * object_map_lock, aio_lock
*/
RWLock owner_lock; // protects exclusive lock leadership updates
RWLock md_lock; // protects access to the mutable image metadata that
RWLock snap_lock; // protects snapshot-related member variables:
RWLock parent_lock; // protects parent_md and parent
Mutex refresh_lock; // protects refresh_seq and last_refresh
+ RWLock object_map_lock; // protects object map updates
Mutex aio_lock; // protects pending_aio and pending_aio_cond
Mutex copyup_list_lock; // protects copyup_waiting_list
Cond pending_aio_cond;
uint64_t pending_aio;
+ ceph::BitVector<2> object_map;
+
/**
* Either image_name or image_id must be set.
* If id is not known, pass the empty std::string,
uint64_t overlap);
void wait_for_pending_aio();
void wait_for_pending_copyup();
+
+ int refresh_object_map();
+ int resize_object_map(uint8_t default_object_state);
+ int update_object_map(uint64_t object_no, uint8_t object_state);
+ int update_object_map(uint64_t start_object_no, uint64_t end_object_no,
+ uint8_t new_state,
+ const boost::optional<uint8_t> ¤t_state);
};
}
return image_name + RBD_SUFFIX;
}
+ const string object_map_name(const string &image_id)
+ {
+ return RBD_OBJECT_MAP_PREFIX + image_id;
+ }
+
int detect_format(IoCtx &io_ctx, const string &name,
bool *old_format, uint64_t *size)
{
}
};
+ static const uint8_t OBJECT_NONEXISTENT = 0;
+ static const uint8_t OBJECT_EXISTS = 1;
+ static const uint8_t OBJECT_PENDING = 2;
+
const std::string id_obj_name(const std::string &name);
const std::string header_name(const std::string &image_id);
const std::string old_header_name(const std::string &image_name);
+ const std::string object_map_name(const std::string &image_id);
int detect_format(librados::IoCtx &io_ctx, const std::string &name,
bool *old_format, uint64_t *size);
return "striping";
case RBD_FEATURE_EXCLUSIVE_LOCK:
return "exclusive";
+ case RBD_FEATURE_OBJECT_MAP:
+ return "object map";
default:
return "";
}
{
string s = "";
- for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK;
+ for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP;
feature <<= 1) {
if (feature & features) {
if (s.size())
static void format_features(Formatter *f, uint64_t features)
{
f->open_array_section("features");
- for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK;
+ for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP;
feature <<= 1) {
f->dump_string("feature", feature_str(feature));
}
r = rbd.create(io_ctx, imgname, size, order);
} else {
if (features == 0) {
- features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK;
+ features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK |
+ RBD_FEATURE_OBJECT_MAP;
}
if ((stripe_unit || stripe_count) &&
(stripe_unit != (1ull << *order) && stripe_count != 1)) {
bool format_specified = false,
output_format_specified = false;
int format = 1;
- uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK;
+ uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK |
+ RBD_FEATURE_OBJECT_MAP;
const char *imgname = NULL, *snapname = NULL, *destname = NULL,
*dest_poolname = NULL, *dest_snapname = NULL, *path = NULL,
*devpath = NULL, *lock_cookie = NULL, *lock_client = NULL,
} else if (ceph_argparse_flag(args, i , "--allow-shrink", (char *)NULL)) {
resize_allow_shrink = true;
} else if (ceph_argparse_flag(args, i, "--image-shared", (char *)NULL)) {
- features &= ~RBD_FEATURE_EXCLUSIVE_LOCK;
+ features &= ~(RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_OBJECT_MAP);
} else if (ceph_argparse_witharg(args, i, &val, "--format", (char *) NULL)) {
long long ret = strict_strtoll(val.c_str(), 10, &parse_err);
if (parse_err.empty()) {