]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: introduce RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT 67278/head
authorIlya Dryomov <idryomov@gmail.com>
Tue, 23 Dec 2025 13:27:18 +0000 (14:27 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 9 Feb 2026 23:24:24 +0000 (00:24 +0100)
The existing StandardPolicy that exposed as RBD_LOCK_MODE_EXCLUSIVE
argument to rbd_lock_acquire() disables automatic exclusive lock
transitions with "permanent" semantics: any request to release the lock
causes the peer to error out immediately.  Such a lock owner can
perform maintenance operations that are proxied from other peers, but
any write-like I/O issued by other peers will fail with EROFS.

This isn't suitable for use cases where one of the peers wants to
manage exclusive lock manually (i.e. rbd_lock_acquire() is used) but
the lock is acquired only for very short periods of time.  The rest of
the time the lock is expected to be held by other peers that stay in
the default "auto" mode (AutomaticPolicy) and run as usual, completely
unconcerned with each other or the manual-mode peer.  However, these
peers get acutely aware of the manual-mode peer because when it grabs
the lock with RBD_LOCK_MODE_EXCLUSIVE their I/O gets disrupted: higher
layers translate EROFS into generic EIO, filesystems shut down, etc.

Add a new TransientPolicy exposed as RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
to allow disabling automatic exclusive lock transitions with semantics
that would cause the other peers to block waiting for the lock to be
released by the manual-mode peer.  This is intended to be a low-level
interface -- no attempt to safeguard against potential misuse causing
e.g. indefinite blocking is made.

It's possible to switch between RBD_LOCK_MODE_EXCLUSIVE and
RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT modes of operation both while the
lock is held and after it's released.

Fixes: https://tracker.ceph.com/issues/73824
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 8740544c51781211b82ac45d4bc93b9eb9623e76)

13 files changed:
src/include/rbd/librbd.h
src/librbd/CMakeLists.txt
src/librbd/exclusive_lock/AutomaticPolicy.h
src/librbd/exclusive_lock/StandardPolicy.cc
src/librbd/exclusive_lock/StandardPolicy.h
src/librbd/exclusive_lock/TransientPolicy.cc [new file with mode: 0644]
src/librbd/exclusive_lock/TransientPolicy.h [new file with mode: 0644]
src/librbd/internal.cc
src/pybind/rbd/c_rbd.pxd
src/pybind/rbd/mock_rbd.pxi
src/pybind/rbd/rbd.pyx
src/test/librbd/test_librbd.cc
src/test/pybind/test_rbd.py

index 779f85c5c61f22866530c65abed4690fa016f324..aa2a4d00f62322c2975f4b257e331bcd9c4c26d1 100644 (file)
@@ -287,6 +287,7 @@ typedef struct {
 typedef enum {
   RBD_LOCK_MODE_EXCLUSIVE = 0,
   RBD_LOCK_MODE_SHARED = 1,
+  RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = 2
 } rbd_lock_mode_t;
 
 CEPH_RBD_API void rbd_version(int *major, int *minor, int *extra);
index a5975bf350b6e085107d3ec43e45d51be6dffa27..e992d393d6c87907d99c91369a22a2c54aed60e9 100644 (file)
@@ -72,6 +72,7 @@ set(librbd_internal_srcs
   deep_copy/SnapshotCreateRequest.cc
   deep_copy/Utils.cc
   exclusive_lock/AutomaticPolicy.cc
+  exclusive_lock/TransientPolicy.cc
   exclusive_lock/ImageDispatch.cc
   exclusive_lock/PreAcquireRequest.cc
   exclusive_lock/PostAcquireRequest.cc
index 12ba9b6c4337082f273407d9b2ee4bf098e8db77..2929fb9d5c139c62649b6a613c7d324bcc1da4d7 100644 (file)
@@ -25,7 +25,6 @@ public:
 
 private:
   ImageCtx *m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
index 519e9618ecc37aa6806a08876ce567b1619da44e..4dc18e4ede83426db5140a33b3b3b84221aad4d3 100644 (file)
@@ -20,6 +20,8 @@ int StandardPolicy<I>::lock_requested(bool force) {
   ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
                              << dendl;
 
+  // refuse to release the lock (ignore forced requests) -- the peer
+  // will error out immediately
   return -EROFS;
 }
 
index dd4e19050ce46727297f4a2f2aad0b3c95bd797d..989c3e8eff0750181a636889acd1a4ee258b830c 100644 (file)
@@ -26,7 +26,6 @@ public:
 
 private:
   ImageCtxT* m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
diff --git a/src/librbd/exclusive_lock/TransientPolicy.cc b/src/librbd/exclusive_lock/TransientPolicy.cc
new file mode 100644 (file)
index 0000000..7bd78fd
--- /dev/null
@@ -0,0 +1,31 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#include "librbd/exclusive_lock/TransientPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::TransientPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+template <typename I>
+int TransientPolicy<I>::lock_requested(bool force) {
+  ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock));
+  ceph_assert(m_image_ctx->exclusive_lock != nullptr);
+
+  ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+                             << dendl;
+
+  // it's expected that the lock will be released shortly -- the peer
+  // will block waiting for that to happen
+  return 0;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/TransientPolicy.h b/src/librbd/exclusive_lock/TransientPolicy.h
new file mode 100644 (file)
index 0000000..4ca7058
--- /dev/null
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class TransientPolicy : public Policy {
+public:
+  TransientPolicy(ImageCtxT* image_ctx) : m_image_ctx(image_ctx) {
+  }
+
+  bool may_auto_request_lock() override {
+    return false;
+  }
+
+  int lock_requested(bool force) override;
+
+private:
+  ImageCtxT* m_image_ctx;
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
index 8d7bbe2e8b3b0dedc8c06bffa4d24e7a8bc27938..f5904e0b15e9a731a401af939bbb1496d582c804 100644 (file)
@@ -39,6 +39,7 @@
 #include "librbd/api/Io.h"
 #include "librbd/cache/Utils.h"
 #include "librbd/exclusive_lock/StandardPolicy.h"
+#include "librbd/exclusive_lock/TransientPolicy.h"
 #include "librbd/deep_copy/MetadataCopyRequest.h"
 #include "librbd/image/CloneRequest.h"
 #include "librbd/image/CreateRequest.h"
@@ -951,6 +952,9 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
       if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE) {
        ictx->set_exclusive_lock_policy(
          new exclusive_lock::StandardPolicy(ictx));
+      } else if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT) {
+        ictx->set_exclusive_lock_policy(
+          new exclusive_lock::TransientPolicy(ictx));
       } else {
         return -EOPNOTSUPP;
       }
index f5d9096789a19701653f30e9cfeb933bebbc3017..256d97082be91365132f57ec84e4480772291794 100644 (file)
@@ -196,6 +196,7 @@ cdef extern from "rbd/librbd.h" nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index 7a8faf750bd8ee7de3142ea2f4231500e69a2eff..9b17282085b149253a7de787b2d1788d7fae8d8c 100644 (file)
@@ -200,6 +200,7 @@ cdef nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index f206e78ed1d01d19fd28c11f9901ac24390892ec..50426196cb8bf18aa9b3e962b52320b46820fdb3 100644 (file)
@@ -109,6 +109,7 @@ MIRROR_IMAGE_STATUS_STATE_STOPPED = _MIRROR_IMAGE_STATUS_STATE_STOPPED
 
 RBD_LOCK_MODE_EXCLUSIVE = _RBD_LOCK_MODE_EXCLUSIVE
 RBD_LOCK_MODE_SHARED = _RBD_LOCK_MODE_SHARED
+RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
 
 RBD_IMAGE_OPTION_FORMAT = _RBD_IMAGE_OPTION_FORMAT
 RBD_IMAGE_OPTION_FEATURES = _RBD_IMAGE_OPTION_FEATURES
index d9c0628caecddcccc37e86c95433b56c6c7c447c..9144c006a989704478083e55c94274121d21ccde 100644 (file)
@@ -11212,6 +11212,83 @@ TEST_F(TestLibRBD, FlushCacheWithCopyupOnExternalSnapshot) {
   read_comp->release();
 }
 
+static void test_write_exclusive_lock(rbd_image_t image1, rbd_image_t image2,
+                                      char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(5 * 1000);
+  }
+  ASSERT_TRUE(rbd_aio_is_complete(write_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image2, 0, buf_len, buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
+static void test_write_exclusive_lock_transient(rbd_image_t image1,
+                                                rbd_image_t image2,
+                                                char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(500 * 1000);
+  }
+  ASSERT_FALSE(rbd_aio_is_complete(write_comp));
+  ASSERT_FALSE(rbd_aio_is_complete(flush_comp));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(write_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
 TEST_F(TestLibRBD, ExclusiveLock)
 {
   REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
@@ -11234,6 +11311,8 @@ TEST_F(TestLibRBD, ExclusiveLock)
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
   rbd_lock_mode_t lock_mode;
   char *lock_owners[1];
   size_t max_lock_owners = 0;
@@ -11249,11 +11328,12 @@ TEST_F(TestLibRBD, ExclusiveLock)
 
   rbd_image_t image2;
   ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
-
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
   ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
   ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
                                    "not the owner"));
 
@@ -11265,17 +11345,42 @@ TEST_F(TestLibRBD, ExclusiveLock)
                                     lock_owners[0]));
   rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
 
+  // lock isn't held by anyone, image2 acquires automatically
   ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
   ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE_TRANSIENT -> EXCLUSIVE without unlocking
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
 
   ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_release(image2));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
@@ -11355,6 +11460,171 @@ TEST_F(TestLibRBD, ExclusiveLock)
   rados_ioctx_destroy(ioctx);
 }
 
+TEST_F(TestLibRBD, ExclusiveLockTransient)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  static char buf[10];
+
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  int order = 0;
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  rbd_image_t image1;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL));
+
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
+  rbd_lock_mode_t lock_mode;
+  char *lock_owners[1];
+  size_t max_lock_owners = 0;
+  ASSERT_EQ(-ERANGE, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                         &max_lock_owners));
+  ASSERT_EQ(1U, max_lock_owners);
+
+  ASSERT_EQ(0, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                   &max_lock_owners));
+  ASSERT_EQ(RBD_LOCK_MODE_EXCLUSIVE, lock_mode);
+  ASSERT_STRNE("", lock_owners[0]);
+  ASSERT_EQ(1U, max_lock_owners);
+
+  rbd_image_t image2;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
+  ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                   "not the owner"));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-ENOENT, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                    lock_owners[0]));
+  rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
+
+  // lock isn't held by anyone, image2 acquires automatically
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE -> EXCLUSIVE_TRANSIENT without unlocking
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image2));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  int owner_id = -1;
+  std::mutex lock;
+  const auto pingpong = [&](int m_id, rbd_image_t &m_image) {
+      for (int i = 0; i < 10; i++) {
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          if (owner_id == m_id) {
+            std::cout << m_id << ": releasing exclusive lock" << std::endl;
+            EXPECT_EQ(0, rbd_lock_release(m_image));
+            int lock_owner;
+            EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+            EXPECT_FALSE(lock_owner);
+            owner_id = -1;
+            std::cout << m_id << ": exclusive lock released" << std::endl;
+            continue;
+          }
+        }
+
+        std::cout << m_id << ": acquiring exclusive lock" << std::endl;
+        EXPECT_EQ(0, rbd_lock_acquire(m_image,
+                                      RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_TRUE(lock_owner);
+        std::cout << m_id << ": exclusive lock acquired" << std::endl;
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          owner_id = m_id;
+        }
+        usleep(rand() % 50000);
+      }
+
+      std::lock_guard<std::mutex> locker(lock);
+      if (owner_id == m_id) {
+        EXPECT_EQ(0, rbd_lock_release(m_image));
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_FALSE(lock_owner);
+        owner_id = -1;
+      }
+  };
+  thread ping(bind(pingpong, 1, ref(image1)));
+  thread pong(bind(pingpong, 2, ref(image2)));
+
+  ping.join();
+  pong.join();
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image2));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image1));
+  rados_ioctx_destroy(ioctx);
+}
+
 TEST_F(TestLibRBD, BreakLock)
 {
   SKIP_IF_CRIMSON();
index d62dce1a8dd77aa56cb2694f4031d6772fdd9059..3c6c32b3885bc01ffe452f5159795f3ca67a8347 100644 (file)
@@ -32,7 +32,8 @@ from rbd import (RBD, Group, Image, ImageNotFound, InvalidArgument, ImageExists,
                  RBD_MIRROR_MODE_POOL, RBD_MIRROR_IMAGE_ENABLED,
                  RBD_MIRROR_IMAGE_DISABLED, MIRROR_IMAGE_STATUS_STATE_UNKNOWN,
                  RBD_MIRROR_IMAGE_MODE_JOURNAL, RBD_MIRROR_IMAGE_MODE_SNAPSHOT,
-                 RBD_LOCK_MODE_EXCLUSIVE, RBD_OPERATION_FEATURE_GROUP,
+                 RBD_LOCK_MODE_EXCLUSIVE, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT,
+                 RBD_OPERATION_FEATURE_GROUP,
                  RBD_OPERATION_FEATURE_CLONE_CHILD,
                  RBD_SNAP_NAMESPACE_TYPE_USER,
                  RBD_SNAP_NAMESPACE_TYPE_GROUP,
@@ -2438,10 +2439,13 @@ class TestExclusiveLock(object):
             for offset in [0, IMG_SIZE // 2]:
                 read = image2.read(offset, 256)
                 eq(data, read)
+
     def test_acquire_release_lock(self):
         with Image(ioctx, image_name) as image:
             image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE)
             image.lock_release()
+            image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT)
+            image.lock_release()
 
     @pytest.mark.skip_if_crimson
     def test_break_lock(self):