]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: introduce RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT 66979/head
authorIlya Dryomov <idryomov@gmail.com>
Tue, 23 Dec 2025 13:27:18 +0000 (14:27 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 20 Jan 2026 00:13:21 +0000 (01:13 +0100)
The existing StandardPolicy that exposed as RBD_LOCK_MODE_EXCLUSIVE
argument to rbd_lock_acquire() disables automatic exclusive lock
transitions with "permanent" semantics: any request to release the lock
causes the peer to error out immediately.  Such a lock owner can
perform maintenance operations that are proxied from other peers, but
any write-like I/O issued by other peers will fail with EROFS.

This isn't suitable for use cases where one of the peers wants to
manage exclusive lock manually (i.e. rbd_lock_acquire() is used) but
the lock is acquired only for very short periods of time.  The rest of
the time the lock is expected to be held by other peers that stay in
the default "auto" mode (AutomaticPolicy) and run as usual, completely
unconcerned with each other or the manual-mode peer.  However, these
peers get acutely aware of the manual-mode peer because when it grabs
the lock with RBD_LOCK_MODE_EXCLUSIVE their I/O gets disrupted: higher
layers translate EROFS into generic EIO, filesystems shut down, etc.

Add a new TransientPolicy exposed as RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
to allow disabling automatic exclusive lock transitions with semantics
that would cause the other peers to block waiting for the lock to be
released by the manual-mode peer.  This is intended to be a low-level
interface -- no attempt to safeguard against potential misuse causing
e.g. indefinite blocking is made.

It's possible to switch between RBD_LOCK_MODE_EXCLUSIVE and
RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT modes of operation both while the
lock is held and after it's released.

Fixes: https://tracker.ceph.com/issues/73824
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
13 files changed:
src/include/rbd/librbd.h
src/librbd/CMakeLists.txt
src/librbd/exclusive_lock/AutomaticPolicy.h
src/librbd/exclusive_lock/StandardPolicy.cc
src/librbd/exclusive_lock/StandardPolicy.h
src/librbd/exclusive_lock/TransientPolicy.cc [new file with mode: 0644]
src/librbd/exclusive_lock/TransientPolicy.h [new file with mode: 0644]
src/librbd/internal.cc
src/pybind/rbd/c_rbd.pxd
src/pybind/rbd/mock_rbd.pxi
src/pybind/rbd/rbd.pyx
src/test/librbd/test_librbd.cc
src/test/pybind/test_rbd.py

index 58fba47f0da54b184af6c2d338d2cec9f301ef0a..e19d25fd195cfcaec6d3d5c04c7f3e46d2575cf9 100644 (file)
@@ -311,6 +311,7 @@ typedef struct {
 typedef enum {
   RBD_LOCK_MODE_EXCLUSIVE = 0,
   RBD_LOCK_MODE_SHARED = 1,
+  RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = 2
 } rbd_lock_mode_t;
 
 CEPH_RBD_API void rbd_version(int *major, int *minor, int *extra);
index 7c7d1c9bf1b7b84b9b4be385f8c07d19b2709d3e..cde9fa11a100b7e816568cbe8d7a6d180bc3c354 100644 (file)
@@ -72,6 +72,7 @@ set(librbd_internal_srcs
   deep_copy/SnapshotCreateRequest.cc
   deep_copy/Utils.cc
   exclusive_lock/AutomaticPolicy.cc
+  exclusive_lock/TransientPolicy.cc
   exclusive_lock/ImageDispatch.cc
   exclusive_lock/PreAcquireRequest.cc
   exclusive_lock/PostAcquireRequest.cc
index 012e967801c67af9d004dc3173c7d3ace180081e..3afdb43a9257928e973e9f52c276773b182d2d65 100644 (file)
@@ -25,7 +25,6 @@ public:
 
 private:
   ImageCtx *m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
index 49f1c9955bfcf78a552093b52a3fab6873d6c788..567e3dba1927652862f65baedf4fca0d1534e5e5 100644 (file)
@@ -20,6 +20,8 @@ int StandardPolicy<I>::lock_requested(bool force) {
   ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
                              << dendl;
 
+  // refuse to release the lock (ignore forced requests) -- the peer
+  // will error out immediately
   return -EROFS;
 }
 
index e19944f4cb10587edca992f8d6f07eee0b16481c..40cd81dc7c30530a7998c2b6952955dd3cb8531a 100644 (file)
@@ -26,7 +26,6 @@ public:
 
 private:
   ImageCtxT* m_image_ctx;
-
 };
 
 } // namespace exclusive_lock
diff --git a/src/librbd/exclusive_lock/TransientPolicy.cc b/src/librbd/exclusive_lock/TransientPolicy.cc
new file mode 100644 (file)
index 0000000..7bd78fd
--- /dev/null
@@ -0,0 +1,31 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#include "librbd/exclusive_lock/TransientPolicy.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ExclusiveLock.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock::TransientPolicy "
+
+namespace librbd {
+namespace exclusive_lock {
+
+template <typename I>
+int TransientPolicy<I>::lock_requested(bool force) {
+  ceph_assert(ceph_mutex_is_locked(m_image_ctx->owner_lock));
+  ceph_assert(m_image_ctx->exclusive_lock != nullptr);
+
+  ldout(m_image_ctx->cct, 20) << this << " " << __func__ << ": force=" << force
+                             << dendl;
+
+  // it's expected that the lock will be released shortly -- the peer
+  // will block waiting for that to happen
+  return 0;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/TransientPolicy.h b/src/librbd/exclusive_lock/TransientPolicy.h
new file mode 100644 (file)
index 0000000..4ca7058
--- /dev/null
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 sts=2 expandtab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
+
+#include "librbd/exclusive_lock/Policy.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class TransientPolicy : public Policy {
+public:
+  TransientPolicy(ImageCtxT* image_ctx) : m_image_ctx(image_ctx) {
+  }
+
+  bool may_auto_request_lock() override {
+    return false;
+  }
+
+  int lock_requested(bool force) override;
+
+private:
+  ImageCtxT* m_image_ctx;
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::TransientPolicy<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_TRANSIENT_POLICY_H
index 63a40df68261d3963dccdf149c3cc30cd7d944db..d61ef5d45b05eeba7c146b1f2b2b5a415ca91b1f 100644 (file)
@@ -40,6 +40,7 @@
 #include "librbd/api/Io.h"
 #include "librbd/cache/Utils.h"
 #include "librbd/exclusive_lock/StandardPolicy.h"
+#include "librbd/exclusive_lock/TransientPolicy.h"
 #include "librbd/deep_copy/MetadataCopyRequest.h"
 #include "librbd/image/CloneRequest.h"
 #include "librbd/image/CreateRequest.h"
@@ -959,6 +960,9 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
       if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE) {
        ictx->set_exclusive_lock_policy(
          new exclusive_lock::StandardPolicy(ictx));
+      } else if (lock_mode == RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT) {
+        ictx->set_exclusive_lock_policy(
+          new exclusive_lock::TransientPolicy(ictx));
       } else {
         return -EOPNOTSUPP;
       }
index a5a61087c9e2e7deb0cf1ae940f93c6efa0ad81b..ae42d0bf19ac6f92f177e2a47b8ab5b3e28cd56f 100644 (file)
@@ -197,6 +197,7 @@ cdef extern from "rbd/librbd.h" nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index 2f2871b738db4becaa69eeace590cc092cf4140f..41cb27cde78c4712863349a769423c3f8a45f9a6 100644 (file)
@@ -201,6 +201,7 @@ cdef nogil:
     ctypedef enum rbd_lock_mode_t:
         _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE"
         _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED"
+        _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT "RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT"
 
     ctypedef enum rbd_trash_image_source_t:
         _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER",
index 2185f62e2f179e226b038e19fc4c9f34e0574087..a00fc22d6736fc59816829fa334177e13ca6f5cf 100644 (file)
@@ -110,6 +110,7 @@ MIRROR_IMAGE_STATUS_STATE_STOPPED = _MIRROR_IMAGE_STATUS_STATE_STOPPED
 
 RBD_LOCK_MODE_EXCLUSIVE = _RBD_LOCK_MODE_EXCLUSIVE
 RBD_LOCK_MODE_SHARED = _RBD_LOCK_MODE_SHARED
+RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT = _RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT
 
 RBD_IMAGE_OPTION_FORMAT = _RBD_IMAGE_OPTION_FORMAT
 RBD_IMAGE_OPTION_FEATURES = _RBD_IMAGE_OPTION_FEATURES
index 67d94d0a463445225eea9fdb7c25154ee5fdce5e..6f142b9115762aa8691513c58f04558fdfc64743 100644 (file)
@@ -11230,6 +11230,83 @@ TEST_F(TestLibRBD, FlushCacheWithCopyupOnExternalSnapshot) {
   read_comp->release();
 }
 
+static void test_write_exclusive_lock(rbd_image_t image1, rbd_image_t image2,
+                                      char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(5 * 1000);
+  }
+  ASSERT_TRUE(rbd_aio_is_complete(write_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(-EROFS, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image2, 0, buf_len, buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
+static void test_write_exclusive_lock_transient(rbd_image_t image1,
+                                                rbd_image_t image2,
+                                                char* buf, size_t buf_len) {
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(buf_len, rbd_write(image1, 0, buf_len, buf));
+
+  rbd_completion_t write_comp;
+  rbd_aio_create_completion(NULL, NULL, &write_comp);
+  ASSERT_EQ(0, rbd_aio_write(image2, 0, buf_len, buf, write_comp));
+
+  rbd_completion_t flush_comp;
+  rbd_aio_create_completion(NULL, NULL, &flush_comp);
+  ASSERT_EQ(0, rbd_aio_flush(image2, flush_comp));
+
+  for (int i = 0; i < 10 && !rbd_aio_is_complete(write_comp); i++) {
+    usleep(500 * 1000);
+  }
+  ASSERT_FALSE(rbd_aio_is_complete(write_comp));
+  ASSERT_FALSE(rbd_aio_is_complete(flush_comp));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(write_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(write_comp));
+  rbd_aio_release(write_comp);
+
+  ASSERT_EQ(0, rbd_aio_wait_for_complete(flush_comp));
+  ASSERT_EQ(0, rbd_aio_get_return_value(flush_comp));
+  rbd_aio_release(flush_comp);
+
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+}
+
 TEST_F(TestLibRBD, ExclusiveLock)
 {
   REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
@@ -11252,6 +11329,8 @@ TEST_F(TestLibRBD, ExclusiveLock)
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
   rbd_lock_mode_t lock_mode;
   char *lock_owners[1];
   size_t max_lock_owners = 0;
@@ -11267,11 +11346,12 @@ TEST_F(TestLibRBD, ExclusiveLock)
 
   rbd_image_t image2;
   ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
-
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
   ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
   ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
                                    "not the owner"));
 
@@ -11283,17 +11363,42 @@ TEST_F(TestLibRBD, ExclusiveLock)
                                     lock_owners[0]));
   rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
 
+  // lock isn't held by anyone, image2 acquires automatically
   ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
   ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE_TRANSIENT -> EXCLUSIVE without unlocking
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
 
   ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_TRUE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_release(image2));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
   ASSERT_FALSE(lock_owner);
 
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
   ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
   ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
   ASSERT_TRUE(lock_owner);
@@ -11373,6 +11478,171 @@ TEST_F(TestLibRBD, ExclusiveLock)
   rados_ioctx_destroy(ioctx);
 }
 
+TEST_F(TestLibRBD, ExclusiveLockTransient)
+{
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  static char buf[10];
+
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  int order = 0;
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  rbd_image_t image1;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL));
+
+  int lock_owner;
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+
+  rbd_lock_mode_t lock_mode;
+  char *lock_owners[1];
+  size_t max_lock_owners = 0;
+  ASSERT_EQ(-ERANGE, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                         &max_lock_owners));
+  ASSERT_EQ(1U, max_lock_owners);
+
+  ASSERT_EQ(0, rbd_lock_get_owners(image1, &lock_mode, lock_owners,
+                                   &max_lock_owners));
+  ASSERT_EQ(RBD_LOCK_MODE_EXCLUSIVE, lock_mode);
+  ASSERT_STRNE("", lock_owners[0]);
+  ASSERT_EQ(1U, max_lock_owners);
+
+  rbd_image_t image2;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EOPNOTSUPP, rbd_lock_break(image1, RBD_LOCK_MODE_SHARED, ""));
+  ASSERT_EQ(-EOPNOTSUPP,
+            rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT, ""));
+  ASSERT_EQ(-EBUSY, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                   "not the owner"));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-ENOENT, rbd_lock_break(image1, RBD_LOCK_MODE_EXCLUSIVE,
+                                    lock_owners[0]));
+  rbd_lock_get_owners_cleanup(lock_owners, max_lock_owners);
+
+  // lock isn't held by anyone, image2 acquires automatically
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+  test_write_exclusive_lock(image1, image2, buf, sizeof(buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  // EXCLUSIVE -> EXCLUSIVE_TRANSIENT without unlocking
+  test_write_exclusive_lock_transient(image1, image2, buf, sizeof(buf));
+
+  // lock is held by image2
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image2));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  ASSERT_EQ(-EROFS, rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ((ssize_t)sizeof(buf), rbd_write(image1, 0, sizeof(buf), buf));
+  ASSERT_EQ(-EROFS, rbd_write(image2, 0, sizeof(buf), buf));
+
+  ASSERT_EQ(0, rbd_lock_release(image1));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_FALSE(lock_owner);
+
+  int owner_id = -1;
+  std::mutex lock;
+  const auto pingpong = [&](int m_id, rbd_image_t &m_image) {
+      for (int i = 0; i < 10; i++) {
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          if (owner_id == m_id) {
+            std::cout << m_id << ": releasing exclusive lock" << std::endl;
+            EXPECT_EQ(0, rbd_lock_release(m_image));
+            int lock_owner;
+            EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+            EXPECT_FALSE(lock_owner);
+            owner_id = -1;
+            std::cout << m_id << ": exclusive lock released" << std::endl;
+            continue;
+          }
+        }
+
+        std::cout << m_id << ": acquiring exclusive lock" << std::endl;
+        EXPECT_EQ(0, rbd_lock_acquire(m_image,
+                                      RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_TRUE(lock_owner);
+        std::cout << m_id << ": exclusive lock acquired" << std::endl;
+        {
+          std::lock_guard<std::mutex> locker(lock);
+          owner_id = m_id;
+        }
+        usleep(rand() % 50000);
+      }
+
+      std::lock_guard<std::mutex> locker(lock);
+      if (owner_id == m_id) {
+        EXPECT_EQ(0, rbd_lock_release(m_image));
+        int lock_owner;
+        EXPECT_EQ(0, rbd_is_exclusive_lock_owner(m_image, &lock_owner));
+        EXPECT_FALSE(lock_owner);
+        owner_id = -1;
+      }
+  };
+  thread ping(bind(pingpong, 1, ref(image1)));
+  thread pong(bind(pingpong, 2, ref(image2)));
+
+  ping.join();
+  pong.join();
+
+  ASSERT_EQ(0, rbd_lock_acquire(image2, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image2, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image2));
+
+  ASSERT_EQ(0, rbd_lock_acquire(image1, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT));
+  ASSERT_EQ(0, rbd_is_exclusive_lock_owner(image1, &lock_owner));
+  ASSERT_TRUE(lock_owner);
+
+  ASSERT_EQ(0, rbd_close(image1));
+  rados_ioctx_destroy(ioctx);
+}
+
 TEST_F(TestLibRBD, BreakLock)
 {
   REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
index 1fbb5a49ca815e978df527e806f3d6f3e895812b..f1d4f10e391be07ce4b417c79eed0d6824fff791 100644 (file)
@@ -33,7 +33,8 @@ from rbd import (RBD, Group, Image, ImageNotFound, InvalidArgument, ImageExists,
                  RBD_MIRROR_IMAGE_ENABLED, RBD_MIRROR_IMAGE_DISABLED,
                  MIRROR_IMAGE_STATUS_STATE_UNKNOWN,
                  RBD_MIRROR_IMAGE_MODE_JOURNAL, RBD_MIRROR_IMAGE_MODE_SNAPSHOT,
-                 RBD_LOCK_MODE_EXCLUSIVE, RBD_OPERATION_FEATURE_GROUP,
+                 RBD_LOCK_MODE_EXCLUSIVE, RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT,
+                 RBD_OPERATION_FEATURE_GROUP,
                  RBD_OPERATION_FEATURE_CLONE_CHILD,
                  RBD_SNAP_NAMESPACE_TYPE_USER,
                  RBD_SNAP_NAMESPACE_TYPE_GROUP,
@@ -2444,10 +2445,13 @@ class TestExclusiveLock(object):
             for offset in [0, IMG_SIZE // 2]:
                 read = image2.read(offset, 256)
                 eq(data, read)
+
     def test_acquire_release_lock(self):
         with Image(ioctx, image_name) as image:
             image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE)
             image.lock_release()
+            image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE_TRANSIENT)
+            image.lock_release()
 
     def test_break_lock(self):
         blocklist_rados = Rados(conffile='')