]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd: introduce RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
authorIlya Dryomov <idryomov@gmail.com>
Tue, 23 Dec 2025 13:27:18 +0000 (14:27 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Thu, 12 Feb 2026 17:04:09 +0000 (18:04 +0100)
The existing StandardPolicy that exposed as RBD_LOCK_MODE_EXCLUSIVE
argument to rbd_lock_acquire() disables automatic exclusive lock
transitions with "permanent" semantics: any request to release the lock
causes the peer to error out immediately.  Such a lock owner can
perform maintenance operations that are proxied from other peers, but
any write-like I/O issued by other peers will fail with EROFS.

This isn't suitable for use cases where one of the peers wants to
manage exclusive lock manually (i.e. rbd_lock_acquire() is used) but
the lock is acquired only for very short periods of time.  The rest of
the time the lock is expected to be held by other peers that stay in
the default "auto" mode (AutomaticPolicy) and run as usual, completely
unconcerned with each other or the manual-mode peer.  However, these
peers get acutely aware of the manual-mode peer because when it grabs
the lock with RBD_LOCK_MODE_EXCLUSIVE their I/O gets disrupted: higher
layers translate EROFS into generic EIO, filesystems shut down, etc.

Add a new TransientPolicy exposed as RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
to allow disabling automatic exclusive lock transitions with semantics
that would cause the other peers to block waiting for the lock to be
released by the manual-mode peer.  This is intended to be a low-level
interface -- no attempt to safeguard against potential misuse causing
e.g. indefinite blocking is made.

It's possible to switch between RBD_LOCK_MODE_EXCLUSIVE and
RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT modes of operation both while the
lock is held and after it's released.

Fixes: https://tracker.ceph.com/issues/73824
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 8740544c51781211b82ac45d4bc93b9eb9623e76)

13 files changed:
src/include/rbd/librbd.h
src/librbd/CMakeLists.txt
src/librbd/exclusive_lock/AutomaticPolicy.h
src/librbd/exclusive_lock/StandardPolicy.cc
src/librbd/exclusive_lock/StandardPolicy.h
src/librbd/exclusive_lock/TransientPolicy.cc [new file with mode: 0644]
src/librbd/exclusive_lock/TransientPolicy.h [new file with mode: 0644]
src/librbd/internal.cc
src/pybind/rbd/c_rbd.pxd
src/pybind/rbd/mock_rbd.pxi
src/pybind/rbd/rbd.pyx
src/test/librbd/test_librbd.cc
src/test/pybind/test_rbd.py

index b347b03df9e280bfec111facfd6ba7bc1ba055dc..982489fe3dbd85aa16de6ed7fb176d5255a8c05e 100644 (file)
@@ -310,6 +310,7 @@ typedef struct {
 typedef enum {
   RBD_LOCK_MODE_EXCLUSIVE = 0,
   RBD_LOCK_MODE_SHARED = 1,
+  RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = 2
 } rbd_lock_mode_t;
 
 CEPH_RBD_API void rbd_version(int *major, int *minor, int *extra);
index 7c7d1c9bf1b7b84b9b4be385f8c07d19b2709d3e..cde9fa11a100b7e816568cbe8d7a6d180bc3c354 100644 (file)
@@ -72,6 +72,7 @@ set(librbd_internal_srcs
   deep_copy/SnapshotCreateRequest.cc
   deep_copy/Utils.cc
   exclusive_lock/AutomaticPolicy.cc
+  exclusive_lock/TransientPolicy.cc
   exclusive_lock/ImageDispatch.cc
   exclusive_lock/PreAcquireRequest.cc
   exclusive_lock/PostAcquireRequest.cc
index 12ba9b6c4337082f273407d9b2ee4bf098e8db77..2929fb9d5c139c62649b6a613c7d324bcc1da4d7 100644 (file)
@@ -25,7 +25,6 @@ public:
 
 private:
   ImageCtx *m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
index 519e9618ecc37aa6806a08876ce567b1619da44e..4dc18e4ede83426db5140a33b3b3b84221aad4d3 100644 (file)
@@ -20,6 +20,8 @@ int StandardPolicy<I>::lock_requested(bool force) {
   ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
                              << dendl;
 
+  // refuse to release the lock (ignore forced requests) -- the peer
+  // will error out immediately
   return -EROFS;
 }
 
index dd4e19050ce46727297f4a2f2aad0b3c95bd797d..989c3e8eff0750181a636889acd1a4ee258b830c 100644 (file)
@@ -26,7 +26,6 @@ public:
 
 private:
   ImageCtxT* m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
diff --git a/src/librbd/exclusive_lock/TransientPolicy.cc b/src/librbd/exclusive_lock/TransientPolicy.cc
new file mode 100644 (file)
index 0000000..7bd78fd
--- /dev/null
@@ -0,0 +1,31 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#include "librbd/exclusive_lock/TransientPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::TransientPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+template <typename I>
+int TransientPolicy<I>::lock_requested(bool force) {
+  ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock));
+  ceph_assert(m_image_ctx->exclusive_lock != nullptr);
+
+  ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+                             << dendl;
+
+  // it's expected that the lock will be released shortly -- the peer
+  // will block waiting for that to happen
+  return 0;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/TransientPolicy.h b/src/librbd/exclusive_lock/TransientPolicy.h
new file mode 100644 (file)
index 0000000..4ca7058
--- /dev/null
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class TransientPolicy : public Policy {
+public:
+  TransientPolicy(ImageCtxT* image_ctx) : m_image_ctx(image_ctx) {
+  }
+
+  bool may_auto_request_lock() override {
+    return false;
+  }
+
+  int lock_requested(bool force) override;
+
+private:
+  ImageCtxT* m_image_ctx;
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
index 9a2f75e658440ea4ddbd171a2efcfda615866892..a4fa5995e584af4e8c5b6540ecabd692d295158b 100644 (file)
@@ -39,6 +39,7 @@
 #include "librbd/api/Io.h"
 #include "librbd/cache/Utils.h"
 #include "librbd/exclusive_lock/StandardPolicy.h"
+#include "librbd/exclusive_lock/TransientPolicy.h"
 #include "librbd/deep_copy/MetadataCopyRequest.h"
 #include "librbd/image/CloneRequest.h"
 #include "librbd/image/CreateRequest.h"
@@ -958,6 +959,9 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
       if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE) {
        ictx->set_exclusive_lock_policy(
          new exclusive_lock::StandardPolicy(ictx));
+      } else if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT) {
+        ictx->set_exclusive_lock_policy(
+          new exclusive_lock::TransientPolicy(ictx));
       } else {
         return -EOPNOTSUPP;
       }
index a5a61087c9e2e7deb0cf1ae940f93c6efa0ad81b..ae42d0bf19ac6f92f177e2a47b8ab5b3e28cd56f 100644 (file)
@@ -197,6 +197,7 @@ cdef extern from "rbd/librbd.h" nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index 2f2871b738db4becaa69eeace590cc092cf4140f..41cb27cde78c4712863349a769423c3f8a45f9a6 100644 (file)
@@ -201,6 +201,7 @@ cdef nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index 2185f62e2f179e226b038e19fc4c9f34e0574087..a00fc22d6736fc59816829fa334177e13ca6f5cf 100644 (file)
@@ -110,6 +110,7 @@ MIRROR_IMAGE_STATUS_STATE_STOPPED = _MIRROR_IMAGE_STATUS_STATE_STOPPED
 
 RBD_LOCK_MODE_EXCLUSIVE = _RBD_LOCK_MODE_EXCLUSIVE
 RBD_LOCK_MODE_SHARED = _RBD_LOCK_MODE_SHARED
+RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
 
 RBD_IMAGE_OPTION_FORMAT = _RBD_IMAGE_OPTION_FORMAT
 RBD_IMAGE_OPTION_FEATURES = _RBD_IMAGE_OPTION_FEATURES
index a0c2c9ef6d14f51fb5b854b9873c6e440bd6ae10..bdee8fd899177a1e8918ca821f8c977dbebbc2a9 100644 (file)
@@ -11227,6 +11227,83 @@ TEST_F(TestLibRBD, FlushCacheWithCopyupOnExternalSnapshot) {
   read_comp->release();
 }
 
+static void test_write_exclusive_lock(rbd_image_t image1, rbd_image_t image2,
+                                      char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(5 * 1000);
+  }
+  ASSERT_TRUE(rbd_aio_is_complete(write_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image2, 0, buf_len, buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
+static void test_write_exclusive_lock_transient(rbd_image_t image1,
+                                                rbd_image_t image2,
+                                                char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(500 * 1000);
+  }
+  ASSERT_FALSE(rbd_aio_is_complete(write_comp));
+  ASSERT_FALSE(rbd_aio_is_complete(flush_comp));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(write_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
 TEST_F(TestLibRBD, ExclusiveLock)
 {
   REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
@@ -11249,6 +11326,8 @@ TEST_F(TestLibRBD, ExclusiveLock)
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
   rbd_lock_mode_t lock_mode;
   char *lock_owners[1];
   size_t max_lock_owners = 0;
@@ -11264,11 +11343,12 @@ TEST_F(TestLibRBD, ExclusiveLock)
 
   rbd_image_t image2;
   ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
-
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
   ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
   ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
                                    "not the owner"));
 
@@ -11280,17 +11360,42 @@ TEST_F(TestLibRBD, ExclusiveLock)
                                     lock_owners[0]));
   rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
 
+  // lock isn't held by anyone, image2 acquires automatically
   ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
   ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE_TRANSIENT -> EXCLUSIVE without unlocking
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
 
   ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_release(image2));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
@@ -11370,6 +11475,171 @@ TEST_F(TestLibRBD, ExclusiveLock)
   rados_ioctx_destroy(ioctx);
 }
 
+TEST_F(TestLibRBD, ExclusiveLockTransient)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  static char buf[10];
+
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  int order = 0;
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  rbd_image_t image1;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL));
+
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
+  rbd_lock_mode_t lock_mode;
+  char *lock_owners[1];
+  size_t max_lock_owners = 0;
+  ASSERT_EQ(-ERANGE, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                         &max_lock_owners));
+  ASSERT_EQ(1U, max_lock_owners);
+
+  ASSERT_EQ(0, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                   &max_lock_owners));
+  ASSERT_EQ(RBD_LOCK_MODE_EXCLUSIVE, lock_mode);
+  ASSERT_STRNE("", lock_owners[0]);
+  ASSERT_EQ(1U, max_lock_owners);
+
+  rbd_image_t image2;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
+  ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                   "not the owner"));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-ENOENT, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                    lock_owners[0]));
+  rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
+
+  // lock isn't held by anyone, image2 acquires automatically
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE -> EXCLUSIVE_TRANSIENT without unlocking
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image2));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  int owner_id = -1;
+  std::mutex lock;
+  const auto pingpong = [&](int m_id, rbd_image_t &m_image) {
+      for (int i = 0; i < 10; i++) {
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          if (owner_id == m_id) {
+            std::cout << m_id << ": releasing exclusive lock" << std::endl;
+            EXPECT_EQ(0, rbd_lock_release(m_image));
+            int lock_owner;
+            EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+            EXPECT_FALSE(lock_owner);
+            owner_id = -1;
+            std::cout << m_id << ": exclusive lock released" << std::endl;
+            continue;
+          }
+        }
+
+        std::cout << m_id << ": acquiring exclusive lock" << std::endl;
+        EXPECT_EQ(0, rbd_lock_acquire(m_image,
+                                      RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_TRUE(lock_owner);
+        std::cout << m_id << ": exclusive lock acquired" << std::endl;
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          owner_id = m_id;
+        }
+        usleep(rand() % 50000);
+      }
+
+      std::lock_guard<std::mutex> locker(lock);
+      if (owner_id == m_id) {
+        EXPECT_EQ(0, rbd_lock_release(m_image));
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_FALSE(lock_owner);
+        owner_id = -1;
+      }
+  };
+  thread ping(bind(pingpong, 1, ref(image1)));
+  thread pong(bind(pingpong, 2, ref(image2)));
+
+  ping.join();
+  pong.join();
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image2));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image1));
+  rados_ioctx_destroy(ioctx);
+}
+
 TEST_F(TestLibRBD, BreakLock)
 {
   SKIP_IF_CRIMSON();
index 9e3995687fbad8bae18559e44372abf73a8802e9..91e011c1e9f75687f74dfc7372117d0406821ee1 100644 (file)
@@ -33,7 +33,8 @@ from rbd import (RBD, Group, Image, ImageNotFound, InvalidArgument, ImageExists,
                  RBD_MIRROR_IMAGE_ENABLED, RBD_MIRROR_IMAGE_DISABLED,
                  MIRROR_IMAGE_STATUS_STATE_UNKNOWN,
                  RBD_MIRROR_IMAGE_MODE_JOURNAL, RBD_MIRROR_IMAGE_MODE_SNAPSHOT,
-                 RBD_LOCK_MODE_EXCLUSIVE, RBD_OPERATION_FEATURE_GROUP,
+                 RBD_LOCK_MODE_EXCLUSIVE, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT,
+                 RBD_OPERATION_FEATURE_GROUP,
                  RBD_OPERATION_FEATURE_CLONE_CHILD,
                  RBD_SNAP_NAMESPACE_TYPE_USER,
                  RBD_SNAP_NAMESPACE_TYPE_GROUP,
@@ -2445,10 +2446,13 @@ class TestExclusiveLock(object):
             for offset in [0, IMG_SIZE // 2]:
                 read = image2.read(offset, 256)
                 eq(data, read)
+
     def test_acquire_release_lock(self):
         with Image(ioctx, image_name) as image:
             image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE)
             image.lock_release()
+            image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT)
+            image.lock_release()
 
     @pytest.mark.skip_if_crimson
     def test_break_lock(self):