OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility
OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
+OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
/*
* The following options change the behavior for librbd's image creation methods that
bool up;
} rbd_mirror_image_status_t;
+typedef enum {
+ RBD_LOCK_MODE_EXCLUSIVE = 0,
+ RBD_LOCK_MODE_SHARED = 1,
+} rbd_lock_mode_t;
+
CEPH_RBD_API void rbd_version(int *major, int *minor, int *extra);
/* image options */
/* exclusive lock feature */
CEPH_RBD_API int rbd_is_exclusive_lock_owner(rbd_image_t image, int *is_owner);
+CEPH_RBD_API int rbd_lock_acquire(rbd_image_t image, rbd_lock_mode_t lock_mode);
+CEPH_RBD_API int rbd_lock_release(rbd_image_t image);
/* object map feature */
CEPH_RBD_API int rbd_rebuild_object_map(rbd_image_t image,
/* exclusive lock feature */
int is_exclusive_lock_owner(bool *is_owner);
+ int lock_acquire(rbd_lock_mode_t lock_mode);
+ int lock_release();
/* object map feature */
int rebuild_object_map(ProgressContext &prog_ctx);
#include "librbd/AioCompletion.h"
#include "librbd/AioImageRequest.h"
#include "librbd/ExclusiveLock.h"
+#include "librbd/exclusive_lock/Policy.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageState.h"
#include "librbd/internal.h"
assert(m_image_ctx.owner_lock.is_locked());
bool write_op = req->is_write_op();
+ bool lock_required = (write_op && is_lock_required()) ||
+ (!write_op && m_require_lock_on_read);
+
+ if (lock_required && !m_image_ctx.get_exclusive_lock_policy()->may_auto_request_lock()) {
+ lderr(cct) << "op requires exclusive lock" << dendl;
+ req->fail(-EROFS);
+ delete req;
+ finish_in_flight_op();
+ return;
+ }
+
if (write_op) {
m_queued_writes.inc();
} else {
ThreadPool::PointerWQ<AioImageRequest<> >::queue(req);
- if ((write_op && is_lock_required()) ||
- (!write_op && m_require_lock_on_read)) {
+ if (lock_required) {
m_image_ctx.exclusive_lock->request_lock(nullptr);
}
}
Operations.cc
Utils.cc
exclusive_lock/AcquireRequest.cc
+ exclusive_lock/AutomaticPolicy.cc
exclusive_lock/ReacquireRequest.cc
exclusive_lock/ReleaseRequest.cc
exclusive_lock/StandardPolicy.cc
#include "librbd/AsyncOperation.h"
#include "librbd/AsyncRequest.h"
#include "librbd/ExclusiveLock.h"
+#include "librbd/exclusive_lock/AutomaticPolicy.h"
#include "librbd/exclusive_lock/StandardPolicy.h"
#include "librbd/internal.h"
#include "librbd/ImageCtx.h"
cct->_conf->rbd_op_thread_timeout,
thread_pool_singleton);
- exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
+ if (cct->_conf->rbd_auto_exclusive_lock_until_manual_request) {
+ exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
+ } else {
+ exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
+ }
journal_policy = new journal::StandardPolicy(this);
}
int r = 0;
bool accept_request = m_image_ctx.exclusive_lock->accept_requests(&r);
- // need to send something back so the client can detect a missing leader
- ::encode(ResponseMessage(r), ack_ctx->out);
-
if (accept_request) {
+ assert(r == 0);
Mutex::Locker owner_client_id_locker(m_owner_client_id_lock);
if (!m_owner_client_id.is_valid()) {
return true;
ldout(m_image_ctx.cct, 10) << this << " queuing release of exclusive lock"
<< dendl;
- m_image_ctx.get_exclusive_lock_policy()->lock_requested(payload.force);
+ r = m_image_ctx.get_exclusive_lock_policy()->lock_requested(
+ payload.force);
}
+ ::encode(ResponseMessage(r), ack_ctx->out);
}
return true;
}
librbd/Operations.cc \
librbd/Utils.cc \
librbd/exclusive_lock/AcquireRequest.cc \
+ librbd/exclusive_lock/AutomaticPolicy.cc \
librbd/exclusive_lock/ReacquireRequest.cc \
librbd/exclusive_lock/ReleaseRequest.cc \
librbd/exclusive_lock/StandardPolicy.cc \
librbd/Utils.h \
librbd/WatchNotifyTypes.h \
librbd/exclusive_lock/AcquireRequest.h \
+ librbd/exclusive_lock/AutomaticPolicy.h \
librbd/exclusive_lock/Policy.h \
librbd/exclusive_lock/ReacquireRequest.h \
librbd/exclusive_lock/ReleaseRequest.h \
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/AutomaticPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::AutomaticPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+int AutomaticPolicy::lock_requested(bool force) {
+ assert(m_image_ctx->owner_lock.is_locked());
+ assert(m_image_ctx->exclusive_lock != nullptr);
+
+ ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+ << dendl;
+
+ // release the lock upon request (ignore forced requests)
+ m_image_ctx->exclusive_lock->release_lock(nullptr);
+ return 0;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+class AutomaticPolicy : public Policy {
+public:
+ AutomaticPolicy(ImageCtx *image_ctx) : m_image_ctx(image_ctx) {
+ }
+
+ virtual bool may_auto_request_lock() {
+ return true;
+ }
+
+ virtual int lock_requested(bool force);
+
+private:
+ ImageCtx *m_image_ctx;
+
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_AUTOMATIC_POLICY_H
virtual ~Policy() {
}
- virtual void lock_requested(bool force) = 0;
+ virtual bool may_auto_request_lock() = 0;
+ virtual int lock_requested(bool force) = 0;
};
} // namespace exclusive_lock
#include "librbd/ImageCtx.h"
#include "librbd/ExclusiveLock.h"
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::StandardPolicy "
+
namespace librbd {
namespace exclusive_lock {
-void StandardPolicy::lock_requested(bool force) {
+int StandardPolicy::lock_requested(bool force) {
assert(m_image_ctx->owner_lock.is_locked());
assert(m_image_ctx->exclusive_lock != nullptr);
- // release the lock upon request (ignore forced requests)
- m_image_ctx->exclusive_lock->release_lock(nullptr);
+ ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+ << dendl;
+
+ return -EROFS;
}
} // namespace exclusive_lock
namespace exclusive_lock {
-class StandardPolicy : public Policy{
+class StandardPolicy : public Policy {
public:
StandardPolicy(ImageCtx *image_ctx) : m_image_ctx(image_ctx) {
}
- virtual void lock_requested(bool force);
+ virtual bool may_auto_request_lock() {
+ return false;
+ }
+
+ virtual int lock_requested(bool force);
private:
ImageCtx *m_image_ctx;
#include "librbd/Operations.h"
#include "librbd/parent_types.h"
#include "librbd/Utils.h"
+#include "librbd/exclusive_lock/AutomaticPolicy.h"
+#include "librbd/exclusive_lock/StandardPolicy.h"
#include "librbd/operation/TrimRequest.h"
#include "include/util.h"
return 0;
}
+ int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode)
+ {
+ if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
+ return -EOPNOTSUPP;
+ }
+
+ CephContext *cct = ictx->cct;
+ C_SaferCond lock_ctx;
+ {
+ RWLock::WLocker l(ictx->owner_lock);
+
+ if (ictx->exclusive_lock == nullptr) {
+ lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
+ return -EINVAL;
+ }
+
+ if (ictx->get_exclusive_lock_policy()->may_auto_request_lock()) {
+ ictx->set_exclusive_lock_policy(
+ new exclusive_lock::StandardPolicy(ictx));
+ }
+
+ if (ictx->exclusive_lock->is_lock_owner()) {
+ return 0;
+ }
+
+ ictx->exclusive_lock->request_lock(&lock_ctx);
+ }
+
+ int r = lock_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+
+ RWLock::RLocker l(ictx->owner_lock);
+
+ if (ictx->exclusive_lock == nullptr ||
+ !ictx->exclusive_lock->is_lock_owner()) {
+ lderr(cct) << "failed to acquire exclusive lock" << dendl;
+ return -EROFS;
+ }
+
+ return 0;
+ }
+
+ int lock_release(ImageCtx *ictx)
+ {
+ CephContext *cct = ictx->cct;
+ C_SaferCond lock_ctx;
+ {
+ RWLock::WLocker l(ictx->owner_lock);
+
+ if (ictx->exclusive_lock == nullptr ||
+ !ictx->exclusive_lock->is_lock_owner()) {
+ lderr(cct) << "not exclusive lock owner" << dendl;
+ return -EINVAL;
+ }
+
+ ictx->exclusive_lock->release_lock(&lock_ctx);
+ }
+
+ int r = lock_ctx.wait();
+ if (r < 0) {
+ lderr(cct) << "failed to release exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ return r;
+ }
+ return 0;
+ }
+
int remove(IoCtx& io_ctx, const std::string &image_name,
const std::string &image_id, ProgressContext& prog_ctx,
bool force)
ictx->exclusive_lock->shut_down(&ctx);
r = ctx.wait();
if (r < 0) {
- lderr(cct) << "error shutting down exclusive lock"
+ lderr(cct) << "error shutting down exclusive lock: "
<< cpp_strerror(r) << dendl;
ictx->state->close();
return r;
int get_flags(ImageCtx *ictx, uint64_t *flags);
int set_image_notification(ImageCtx *ictx, int fd, int type);
int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner);
+ int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode);
+ int lock_release(ImageCtx *ictx);
int remove(librados::IoCtx& io_ctx, const std::string &image_name,
const std::string &image_id, ProgressContext& prog_ctx,
return r;
}
+ int Image::lock_acquire(rbd_lock_mode_t lock_mode)
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ tracepoint(librbd, lock_acquire_enter, ictx, lock_mode);
+ int r = librbd::lock_acquire(ictx, lock_mode);
+ tracepoint(librbd, lock_acquire_exit, ictx, r);
+ return r;
+ }
+
+ int Image::lock_release()
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ tracepoint(librbd, lock_release_enter, ictx);
+ int r = librbd::lock_release(ictx);
+ tracepoint(librbd, lock_release_exit, ictx, r);
+ return r;
+ }
+
int Image::rebuild_object_map(ProgressContext &prog_ctx)
{
ImageCtx *ictx = reinterpret_cast<ImageCtx*>(ctx);
return r;
}
+extern "C" int rbd_lock_acquire(rbd_image_t image, rbd_lock_mode_t lock_mode)
+{
+ librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+ tracepoint(librbd, lock_acquire_enter, ictx, lock_mode);
+ int r = librbd::lock_acquire(ictx, lock_mode);
+ tracepoint(librbd, lock_acquire_exit, ictx, r);
+ return r;
+}
+
+extern "C" int rbd_lock_release(rbd_image_t image)
+{
+ librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+ tracepoint(librbd, lock_release_enter, ictx);
+ int r = librbd::lock_release(ictx);
+ tracepoint(librbd, lock_release_exit, ictx, r);
+ return r;
+}
+
extern "C" int rbd_rebuild_object_map(rbd_image_t image,
librbd_progress_fn_t cb, void *cbdata)
{
ASSERT_TRUE(read_bl.is_zero());
read_comp->release();
}
+
+TEST_F(TestLibRBD, ExclusiveLock)
+{
+ REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+ static char buf[10];
+
+ rados_ioctx_t ioctx;
+ rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+ std::string name = get_temp_image_name();
+ uint64_t size = 2 << 20;
+ int order = 0;
+ ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+ rbd_image_t image1;
+ ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL));
+
+ int lock_owner;
+ ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+ ASSERT_TRUE(lock_owner);
+
+ rbd_image_t image2;
+ ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
+
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+ ASSERT_FALSE(lock_owner);
+
+ ASSERT_EQ(0, rbd_lock_release(image1));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+ ASSERT_FALSE(lock_owner);
+
+ ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+ ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
+ ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+ ASSERT_TRUE(lock_owner);
+
+ ASSERT_EQ(0, rbd_lock_release(image2));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+ ASSERT_FALSE(lock_owner);
+
+ ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+ ASSERT_TRUE(lock_owner);
+
+ ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+ ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+ ASSERT_EQ(0, rbd_lock_release(image1));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+ ASSERT_FALSE(lock_owner);
+
+ int owner_id = -1;
+ Mutex lock("ping-pong");
+ class PingPong : public Thread {
+ public:
+ explicit PingPong(int id, rbd_image_t &image, int &owner_id, Mutex &lock)
+ : m_id(id), m_image(image), m_owner_id(owner_id), m_lock(lock) {
+ };
+
+ protected:
+ void *entry() {
+ for (int i = 0; i < 10; i++) {
+ {
+ Mutex::Locker locker(m_lock);
+ if (m_owner_id == m_id) {
+ std::cout << m_id << ": releasing exclusive lock" << std::endl;
+ EXPECT_EQ(0, rbd_lock_release(m_image));
+ int lock_owner;
+ EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+ EXPECT_FALSE(lock_owner);
+ m_owner_id = -1;
+ std::cout << m_id << ": exclusive lock released" << std::endl;
+ continue;
+ }
+ }
+
+ std::cout << m_id << ": acquiring exclusive lock" << std::endl;
+ EXPECT_EQ(0, rbd_lock_acquire(m_image, RBD_LOCK_MODE_EXCLUSIVE));
+ int lock_owner;
+ EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+ EXPECT_TRUE(lock_owner);
+ std::cout << m_id << ": exclusive lock acquired" << std::endl;
+ {
+ Mutex::Locker locker(m_lock);
+ m_owner_id = m_id;
+ }
+ usleep(rand() % 50000);
+ }
+
+ Mutex::Locker locker(m_lock);
+ if (m_owner_id == m_id) {
+ EXPECT_EQ(0, rbd_lock_release(m_image));
+ int lock_owner;
+ EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+ EXPECT_FALSE(lock_owner);
+ m_owner_id = -1;
+ }
+
+ return NULL;
+ }
+
+ private:
+ int m_id;
+ rbd_image_t &m_image;
+ int &m_owner_id;
+ Mutex &m_lock;
+ } ping(1, image1, owner_id, lock), pong(2, image2, owner_id, lock);
+
+ ping.create("ping");
+ pong.create("pong");
+ ping.join();
+ pong.join();
+
+ ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+ ASSERT_TRUE(lock_owner);
+
+ ASSERT_EQ(0, rbd_close(image2));
+
+ ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+ ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+ ASSERT_TRUE(lock_owner);
+
+ ASSERT_EQ(0, rbd_close(image1));
+ rados_ioctx_destroy(ioctx);
+}
struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy {
- virtual void lock_requested(bool force) {
+ virtual bool may_auto_request_lock() {
+ return false;
+ }
+
+ virtual int lock_requested(bool force) {
// TODO: interlock is being requested (e.g. local promotion)
// Wait for demote event from peer or abort replay on forced
// promotion.
+ return -EROFS;
}
};
)
)
+TRACEPOINT_EVENT(librbd, lock_acquire_enter,
+ TP_ARGS(
+ void*, imagectx,
+ int, lock_mode),
+ TP_FIELDS(
+ ctf_integer_hex(void*, imagectx, imagectx)
+ ctf_integer(int, lock_mode, lock_mode)
+ )
+)
+
+TRACEPOINT_EVENT(librbd, lock_acquire_exit,
+ TP_ARGS(
+ void*, imagectx,
+ int, retval),
+ TP_FIELDS(
+ ctf_integer_hex(void*, imagectx, imagectx)
+ ctf_integer(int, retval, retval)
+ )
+)
+
+TRACEPOINT_EVENT(librbd, lock_release_enter,
+ TP_ARGS(
+ void*, imagectx),
+ TP_FIELDS(
+ ctf_integer_hex(void*, imagectx, imagectx)
+ )
+)
+
+TRACEPOINT_EVENT(librbd, lock_release_exit,
+ TP_ARGS(
+ void*, imagectx,
+ int, retval),
+ TP_FIELDS(
+ ctf_integer_hex(void*, imagectx, imagectx)
+ ctf_integer(int, retval, retval)
+ )
+)
+
TRACEPOINT_EVENT(librbd, stat_enter,
TP_ARGS(
void*, imagectx,