]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson: use tri_mutex for guarding ObjectContext
authorKefu Chai <kchai@redhat.com>
Mon, 14 Sep 2020 07:23:17 +0000 (15:23 +0800)
committerKefu Chai <kchai@redhat.com>
Tue, 15 Sep 2020 08:48:27 +0000 (16:48 +0800)
before this change, a seastar::shared_mutex, a RWState and a
shared_promise are used for tracking the consumers of ObjectContext.
and all of the consumers are put into writers if the predicate function
evaluates to "false", and is awaken if the predicate function evaluates
to "true" afterwards in a polling loop waiting on the shared_promise,
which is in turn fulfilled once the last consumer of the given category
relinquishes the lock.

this approach has couple issues:

* it is heavy weighted. seastar::shared_mutex already tracks each of
  the waiters' continuation using separate promise<>, and it does try
  to reschedule them once a given consumer releases the last lock.
  so it's like a design of a customized shared_mutex over a
  shared_mutex.
* it is complicated. 3 variables for tracking the different
  consumers of ObjectContext.

in this change,

* `tri_mutex` is introduced as a variant of the original
  `seastar::shared_mutex` to track two different shared users in
  addition to an exclusive user.
* replace `shared_mutex` with `tri_mutex` in `ObjectContext`, to
  simplify the design.
* move recovery_read_marker into `ObjectContext`. assuming all
  pending actions will be added as a waiter for the related
  object context before they acquire the lock.

Signed-off-by: Kefu Chai <kchai@redhat.com>
src/crimson/CMakeLists.txt
src/crimson/common/tri_mutex.cc [new file with mode: 0644]
src/crimson/common/tri_mutex.h [new file with mode: 0644]
src/crimson/osd/object_context.h

index 35ff74498a5f658d11f11eaee26d38543933e8ed..d346e942f0869adf73da9dd7b4b35f8efc94eff7 100644 (file)
@@ -21,7 +21,8 @@ set(crimson_common_srcs
   common/formatter.cc
   common/perf_counters_collection.cc
   common/log.cc
-  common/throttle.cc)
+  common/throttle.cc
+  common/tri_mutex.cc)
 
 # the specialized version of ceph-common, where
 #  - the logging is sent to Seastar backend
diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc
new file mode 100644 (file)
index 0000000..247f374
--- /dev/null
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tri_mutex.h"
+
+seastar::future<> tri_mutex::lock_for_read()
+{
+  if (try_lock_for_read()) {
+    return seastar::make_ready_future<>();
+  }
+  waiters.emplace_back(seastar::promise<>(), type_t::read);
+  return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_read() noexcept
+{
+  if (!writers && !exclusively_used && waiters.empty()) {
+    ++readers;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void tri_mutex::unlock_for_read()
+{
+  assert(readers > 0);
+  if (--readers == 0) {
+    wake();
+  }
+}
+
+seastar::future<> tri_mutex::lock_for_write(bool greedy)
+{
+  if (try_lock_for_write(greedy)) {
+    return seastar::make_ready_future<>();
+  }
+  waiters.emplace_back(seastar::promise<>(), type_t::write);
+  return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_write(bool greedy) noexcept
+{
+  if (!readers && !exclusively_used) {
+    if (greedy || waiters.empty()) {
+      ++writers;
+      return true;
+    }
+  }
+  return false;
+}
+
+void tri_mutex::unlock_for_write()
+{
+  assert(writers > 0);
+  if (--writers == 0) {
+    wake();
+  }
+}
+
+// for exclusive users
+seastar::future<> tri_mutex::lock_for_excl()
+{
+  if (try_lock_for_excl()) {
+    return seastar::make_ready_future<>();
+  }
+  waiters.emplace_back(seastar::promise<>(), type_t::exclusive);
+  return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_excl() noexcept
+{
+  if (!readers && !writers && !exclusively_used) {
+    exclusively_used = true;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void tri_mutex::unlock_for_excl()
+{
+  assert(exclusively_used);
+  exclusively_used = false;
+  wake();
+}
+
+bool tri_mutex::is_acquired() const
+{
+  if (readers) {
+    return true;
+  } else if (writers) {
+    return true;
+  } else if (exclusively_used) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void tri_mutex::wake()
+{
+  assert(!readers && !writers && !exclusively_used);
+  type_t type = type_t::none;
+  while (!waiters.empty()) {
+    auto& waiter = waiters.front();
+    if (type == type_t::exclusive) {
+      break;
+    } if (type == type_t::none) {
+      type = waiter.type;
+    } else if (type != waiter.type) {
+      // to be woken in the next batch
+      break;
+    }
+    switch (type) {
+    case type_t::read:
+      ++readers;
+      break;
+    case type_t::write:
+      ++writers;
+      break;
+    case type_t::exclusive:
+      exclusively_used = true;
+      break;
+    default:
+      assert(0);
+    }
+    waiter.pr.set_value();
+    waiters.pop_front();
+  }
+}
diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h
new file mode 100644 (file)
index 0000000..63ca81a
--- /dev/null
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/future.hh>
+#include <seastar/core/circular_buffer.hh>
+
+/// shared/exclusive mutual exclusion
+///
+/// similar to seastar::shared_mutex, but instead of two kinds of  waiters,
+/// tri_mutex keeps track of three kinds of them:
+/// - readers
+/// - writers
+/// - exclusive users
+/// and unlike shared_mutex, tri_mutex have two kinds of shared users of lock:
+/// - readers
+/// - writers, which are not mutual-exclusive
+/// the exclusive users is like the writers in shared_mutex.
+class tri_mutex {
+public:
+  tri_mutex() = default;
+  // for shared readers
+  seastar::future<> lock_for_read();
+  bool try_lock_for_read() noexcept;
+  void unlock_for_read();
+  // for shared writers
+  seastar::future<> lock_for_write(bool greedy);
+  bool try_lock_for_write(bool greedy) noexcept;
+  void unlock_for_write();
+  // for exclusive users
+  seastar::future<> lock_for_excl();
+  bool try_lock_for_excl() noexcept;
+  void unlock_for_excl();
+
+  bool is_acquired() const;
+
+private:
+  void wake();
+  unsigned readers = 0;
+  unsigned writers = 0;
+  bool exclusively_used = false;
+  enum class type_t : uint8_t {
+    read,
+    write,
+    exclusive,
+    none,
+  };
+  struct waiter_t {
+    waiter_t(seastar::promise<>&& pr, type_t type)
+      : pr(std::move(pr)), type(type)
+    {}
+    seastar::promise<> pr;
+    type_t type;
+  };
+  seastar::circular_buffer<waiter_t> waiters;
+};
index 80b64cd55359505bfec5cd2dc45017b4b607a05e..3bd6073d9e6243f6af883407cb92bc775e484532 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "common/intrusive_lru.h"
 #include "osd/object_state.h"
+#include "crimson/common/tri_mutex.h"
 #include "crimson/osd/osd_operation.h"
 
 namespace ceph {
@@ -84,33 +85,8 @@ public:
   }
 
 private:
-  RWState rwstate;
-  seastar::shared_mutex wait_queue;
-  std::optional<seastar::shared_promise<>> wake;
-
-  template <typename F>
-  seastar::future<> with_queue(F &&f) {
-    return wait_queue.lock().then([this, f=std::move(f)] {
-      ceph_assert(!wake);
-      return seastar::repeat([this, f=std::move(f)]() {
-       if (f()) {
-         wait_queue.unlock();
-         return seastar::make_ready_future<seastar::stop_iteration>(
-           seastar::stop_iteration::yes);
-       } else {
-         rwstate.inc_waiters();
-         wake = seastar::shared_promise<>();
-         return wake->get_shared_future().then([this, f=std::move(f)] {
-           wake = std::nullopt;
-           rwstate.dec_waiters(1);
-           return seastar::make_ready_future<seastar::stop_iteration>(
-             seastar::stop_iteration::no);
-         });
-       }
-      });
-    });
-  }
-
+  tri_mutex rwlock;
+  bool recovery_read_marker = false;
 
   const char *get_type_name() const final {
     return "ObjectContext";
@@ -122,23 +98,18 @@ private:
     Operation *op,
     LockF &&lockf) {
     return op->with_blocking_future(
-      make_blocking_future(with_queue(std::forward<LockF>(lockf))));
+      make_blocking_future(std::forward<LockF>(lockf)));
   }
 
-  template <typename UnlockF>
-  void put_lock(
-    UnlockF &&unlockf) {
-    if (unlockf() && wake) wake->set_value();
-  }
 public:
   seastar::future<> get_lock_type(Operation *op, RWState::State type) {
     switch (type) {
     case RWState::RWWRITE:
-      return get_lock(op, [this] { return rwstate.get_write_lock(); });
+      return get_lock(op, rwlock.lock_for_write(false));
     case RWState::RWREAD:
-      return get_lock(op, [this] { return rwstate.get_read_lock(); });
+      return get_lock(op, rwlock.lock_for_read());
     case RWState::RWEXCL:
-      return get_lock(op, [this] { return rwstate.get_excl_lock(); });
+      return get_lock(op, rwlock.lock_for_excl());
     case RWState::RWNONE:
       return seastar::make_ready_future<>();
     default:
@@ -150,11 +121,11 @@ public:
   void put_lock_type(RWState::State type) {
     switch (type) {
     case RWState::RWWRITE:
-      return put_lock([this] { return rwstate.put_write(); });
+      return rwlock.unlock_for_write();
     case RWState::RWREAD:
-      return put_lock([this] { return rwstate.put_read(); });
+      return rwlock.unlock_for_read();
     case RWState::RWEXCL:
-      return put_lock([this] { return rwstate.put_excl(); });
+      return rwlock.unlock_for_excl();
     case RWState::RWNONE:
       return;
     default:
@@ -165,66 +136,62 @@ public:
 
   void degrade_excl_to(RWState::State type) {
     // assume we already hold an excl lock
-    bool put = rwstate.put_excl();
+    rwlock.unlock_for_excl();
     bool success = false;
     switch (type) {
     case RWState::RWWRITE:
-      success = rwstate.get_write_lock();
+      success = rwlock.try_lock_for_write(false);
       break;
     case RWState::RWREAD:
-      success = rwstate.get_read_lock();
+      success = rwlock.try_lock_for_read();
       break;
     case RWState::RWEXCL:
-      success = rwstate.get_excl_lock();
+      success = rwlock.try_lock_for_excl();
       break;
     case RWState::RWNONE:
       success = true;
       break;
     default:
-      ceph_abort_msg("invalid lock type");
+      assert(0 == "invalid lock type");
       break;
     }
     ceph_assert(success);
-    if (put && wake) {
-      wake->set_value();
-    }
   }
 
-  bool empty() const { return rwstate.empty(); }
+  bool empty() const {
+    return !rwlock.is_acquired();
+  }
+  bool is_request_pending() const {
+    return rwlock.is_acquired();
+  }
 
   template <typename F>
   seastar::future<> get_write_greedy(Operation *op) {
-    return get_lock(op, [this] { return rwstate.get_write_lock(true); });
+    return get_lock(op, [this] {
+      return rwlock.lock_for_write(true);
+    });
   }
 
-  bool try_get_read_lock() {
-    return rwstate.get_read_lock();
-  }
-  void drop_read() {
-    return put_lock_type(RWState::RWREAD);
-  }
   bool get_recovery_read() {
-    return rwstate.get_recovery_read();
+    if (rwlock.try_lock_for_read()) {
+      recovery_read_marker = true;
+      return true;
+    } else {
+      return false;
+    }
   }
   seastar::future<> wait_recovery_read() {
-    if (rwstate.get_recovery_read()) {
-      return seastar::make_ready_future<>();
-    }
-    return with_queue([this] {
-      return rwstate.get_recovery_read();
+    return rwlock.lock_for_read().then([this] {
+      recovery_read_marker = true;
     });
   }
   void drop_recovery_read() {
-    ceph_assert(rwstate.recovery_read_marker);
-    drop_read();
-    rwstate.recovery_read_marker = false;
+    assert(recovery_read_marker);
+    rwlock.unlock_for_read();
+    recovery_read_marker = false;
   }
   bool maybe_get_excl() {
-    return rwstate.get_excl_lock();
-  }
-
-  bool is_request_pending() const {
-    return !rwstate.empty();
+    return rwlock.try_lock_for_excl();
   }
 };
 using ObjectContextRef = ObjectContext::Ref;