MDSCacheObject waiting interface accepts a wait mask to queue waiters.
The mask is used to prioritize waiters, so lower absolute mask values will be invoked first.
SimpleLock used to create a different wait mask per lock type, thus prioritizing locks,
but the approach proved to be unfair and #8965 changed the waiting to be FIFO
per wanted wait bit. This change made the spreading of lock wait masks per lock type
redundant.
Until now it wasn't an issue, but adding a new lock type we found out that the 64 bit mask
space was already exhausted given that each lock used up 4 bits from the mask
Further, there was a magic number `8` added to the lock mask shift offset,
which apparently was meant to accommodate for custom high-priority wait bits
of the lock's parent object, but this wasn't properly communicated via interfaces
and caused an overlap of the first lock's WAIT_RD bit with the fourth private
wait bit of CInode::WAIT_FLOCK;
This change optimizes the usage of the available wait tag 64 bit space,
aiming to maintain backward compatibility with the current implementation.
The approach is to split the 64bits of the wait tag into a 16 bit ID and
a 48 bit MASK fields. The ID field will be matched literally, while the MASK
field will be matched by an intersection.
With this apprach we can encode the lock ordinal into the ID field and use just
four bits from the MASK field to distinguish between the different lock wait flags.
Since the ID field occupies the most significant 2 bytes, the comparision of
different lock tags will yield the same results as it does today, when higher
lock ids were occupying higher bit offsets.
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
}
-void CDentry::add_waiter(uint64_t tag, MDSContext *c)
+void CDentry::add_waiter(WaitTag tag, MDSContext *c, bool ordered)
{
// wait on the directory?
if (tag & (WAIT_UNFREEZE|WAIT_SINGLEAUTH)) {
dir->add_waiter(tag, c);
return;
}
- MDSCacheObject::add_waiter(tag, c);
+ MDSCacheObject::add_waiter(tag, c, ordered);
}
// -- wait --
//static const int WAIT_LOCK_OFFSET = 8;
- void add_waiter(uint64_t tag, MDSContext *c) override;
+ void add_waiter(WaitTag tag, MDSContext *c, bool ordered = false) override;
bool is_lt(const MDSCacheObject *r) const override {
return *this < *static_cast<const CDentry*>(r);
put(PIN_DNWAITER);
}
-void CDir::add_waiter(uint64_t tag, MDSContext *c)
+void CDir::add_waiter(WaitTag tag, MDSContext *c, bool ordered)
{
// hierarchical?
if (!is_subtree_root()) {
// try parent
dout(10) << "add_waiter " << std::hex << tag << std::dec << " " << c << " should be ATSUBTREEROOT, " << *this << " is not root, trying parent" << dendl;
- inode->parent->dir->add_waiter(tag, c);
+ inode->parent->dir->add_waiter(tag, c, ordered);
return;
}
}
/* NOTE: this checks dentry waiters too */
-void CDir::take_waiting(uint64_t mask, MDSContext::vec& ls)
+void CDir::take_waiting(WaitTag tag, MDSContext::vec& ls)
{
- if ((mask & WAIT_DENTRY) && !waiting_on_dentry.empty()) {
+ if ((tag & WAIT_DENTRY) && !waiting_on_dentry.empty()) {
// take all dentry waiters
for (const auto &p : waiting_on_dentry) {
dout(10) << "take_waiting dentry " << p.first.name
}
// waiting
- MDSCacheObject::take_waiting(mask, ls);
+ MDSCacheObject::take_waiting(tag, ls);
}
-void CDir::finish_waiting(uint64_t mask, int result)
+void CDir::finish_waiting(WaitTag tag, int result)
{
- dout(11) << __func__ << " mask " << hex << mask << dec << " result " << result << " on " << *this << dendl;
+ dout(11) << __func__ << tag << " result " << result << " on " << *this << dendl;
MDSContext::vec finished;
- take_waiting(mask, finished);
+ take_waiting(tag, finished);
if (result < 0)
finish_contexts(g_ceph_context, finished, result);
else
static const unsigned EXPORT_NONCE = 1;
- // -- wait masks --
- static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache
- static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents
- static const uint64_t WAIT_FROZEN = (1<<2); // auth pins removed
- static const uint64_t WAIT_CREATED = (1<<3); // new dirfrag is logged
+ constexpr static WaitTag const WAIT_ID_CDIR = WaitTag(1);
- static const int WAIT_DNLOCK_OFFSET = 4;
+ // -- wait masks --
+ constexpr static WaitTag const WAIT_DENTRY = WAIT_ID_CDIR.bit_mask(0); // wait for item to be in cache
+ constexpr static WaitTag const WAIT_COMPLETE = WAIT_ID_CDIR.bit_mask(1); // wait for complete dir contents
+ constexpr static WaitTag const WAIT_FROZEN = WAIT_ID_CDIR.bit_mask(2); // auth pins removed
+ constexpr static WaitTag const WAIT_CREATED = WAIT_ID_CDIR.bit_mask(3); // new dirfrag is logged
+ const static int WAIT_BITS = 4;
- static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
- static const uint64_t WAIT_ATSUBTREEROOT = (WAIT_SINGLEAUTH);
+ constexpr static WaitTag const WAIT_ANY_MASK = WAIT_ID_CDIR | (uint64_t)((1 << WAIT_BITS) - 1);
+ constexpr static WaitTag const& WAIT_ATSUBTREEROOT = MDSCacheObject::WAIT_SINGLEAUTH;
// -- dump flags --
static const int DUMP_PATH = (1 << 0);
void add_dentry_waiter(std::string_view dentry, snapid_t snap, MDSContext *c);
void take_dentry_waiting(std::string_view dentry, snapid_t first, snapid_t last, MDSContext::vec& ls);
- void add_waiter(uint64_t mask, MDSContext *c) override;
- void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters
- void finish_waiting(uint64_t mask, int result = 0); // ditto
+ void add_waiter(WaitTag mask, MDSContext *c, bool ordered = false) override;
+ void take_waiting(WaitTag mask, MDSContext::vec& ls) override; // may include dentry waiters
+ void finish_waiting(WaitTag mask, int result = 0); // ditto
// -- import/export --
mds_rank_t get_export_pin(bool inherit=true) const;
}
}
-void CInode::add_waiter(uint64_t tag, MDSContext *c)
+void CInode::add_waiter(WaitTag tag, MDSContext *c, bool ordered)
{
- dout(10) << __func__ << " tag " << std::hex << tag << std::dec << " " << c
+ dout(10) << __func__ << tag << c
<< " !ambig " << !state_test(STATE_AMBIGUOUSAUTH)
<< " !frozen " << !is_frozen_inode()
<< " !freezing " << !is_freezing_inode()
return;
}
dout(15) << "taking waiter here" << dendl;
- MDSCacheObject::add_waiter(tag, c);
+ MDSCacheObject::add_waiter(tag, c, ordered);
}
-void CInode::take_waiting(uint64_t mask, MDSContext::vec& ls)
+void CInode::take_waiting(WaitTag tag, MDSContext::vec& ls)
{
- if ((mask & WAIT_DIR) && !waiting_on_dir.empty()) {
+ if ((tag & WAIT_DIR) && !waiting_on_dir.empty()) {
// take all dentry waiters
while (!waiting_on_dir.empty()) {
auto it = waiting_on_dir.begin();
}
// waiting
- MDSCacheObject::take_waiting(mask, ls);
+ MDSCacheObject::take_waiting(tag, ls);
}
void CInode::maybe_finish_freeze_inode()
*/
static const int MASK_STATE_REPLICATED = STATE_RANDEPHEMERALPIN;
+ constexpr static WaitTag const WAIT_ID_CINODE = WaitTag(0);
+
// -- waiters --
- static const uint64_t WAIT_DIR = (1<<0);
- static const uint64_t WAIT_FROZEN = (1<<1);
- static const uint64_t WAIT_TRUNC = (1<<2);
- static const uint64_t WAIT_FLOCK = (1<<3);
-
- static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
+ constexpr static WaitTag const WAIT_DIR = WAIT_ID_CINODE.bit_mask(0);
+ constexpr static WaitTag const WAIT_FROZEN = WAIT_ID_CINODE.bit_mask(1);
+ constexpr static WaitTag const WAIT_TRUNC = WAIT_ID_CINODE.bit_mask(2);
+ constexpr static WaitTag const WAIT_FLOCK = WAIT_ID_CINODE.bit_mask(3);
+ static const uint64_t WAIT_BITS = 4;
+
+ constexpr static WaitTag const WAIT_ANY_MASK = WAIT_ID_CINODE | (uint64_t)((1 << WAIT_BITS) - 1);
// misc
static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export
bool is_waiting_for_dir(frag_t fg) {
return waiting_on_dir.count(fg);
}
- void add_waiter(uint64_t tag, MDSContext *c) override;
- void take_waiting(uint64_t tag, MDSContext::vec& ls) override;
+ void add_waiter(WaitTag tag, MDSContext *c, bool ordered = false) override;
+ void take_waiting(WaitTag tag, MDSContext::vec& ls) override;
// -- encode/decode helpers --
void _encode_base(ceph::buffer::list& bl, uint64_t features);
}
// wait!
- int wait_on;
+ WaitTag wait_on;
if (lock->get_parent()->is_auth() && lock->is_stable())
wait_on = SimpleLock::WAIT_RD;
else
dout(7) << "handle_mds_recovery mds." << who << dendl;
// exclude all discover waiters. kick_discovers() will do the job
- static const uint64_t i_mask = CInode::WAIT_ANY_MASK & ~CInode::WAIT_DIR;
- static const uint64_t d_mask = CDir::WAIT_ANY_MASK & ~CDir::WAIT_DENTRY;
+ static const WaitTag i_mask = CInode::WAIT_ANY_MASK & ~CInode::WAIT_DIR;
+ static const WaitTag d_mask = CDir::WAIT_ANY_MASK & ~CDir::WAIT_DENTRY;
MDSContext::vec waiters;
mds->mdlog->submit_entry(le, new C_MDC_TruncateLogged(this, in, mut));
// flush immediately if there are readers/writers waiting
- if (in->is_waiter_for(CInode::WAIT_TRUNC) ||
+ if (in->has_waiter_for(CInode::WAIT_TRUNC) ||
(in->get_caps_wanted() & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
mds->mdlog->flush();
}
}
}
-void MDSCacheObject::finish_waiting(uint64_t mask, int result) {
- MDSContext::vec finished;
- take_waiting(mask, finished);
- finish_contexts(g_ceph_context, finished, result);
-}
-
void MDSCacheObject::dump(ceph::Formatter *f) const
{
f->dump_bool("is_auth", is_auth());
if (state_test(STATE_REJOINUNDEF))
f->dump_string("state", "rejoinundef");
}
-
-bool MDSCacheObject::is_waiter_for(uint64_t mask, uint64_t min) {
- if (!min) {
- min = mask;
- while (min & (min-1)) // if more than one bit is set
- min &= min-1; // clear LSB
- }
- for (auto p = waiting.lower_bound(min); p != waiting.end(); ++p) {
- if (p->first & mask) return true;
- if (p->first > mask) return false;
- }
- return false;
-}
-
-void MDSCacheObject::take_waiting(uint64_t mask, MDSContext::vec& ls) {
- if (waiting.empty()) return;
-
- // process ordered waiters in the same order that they were added.
- std::map<uint64_t, MDSContext*> ordered_waiters;
-
- for (auto it = waiting.begin(); it != waiting.end(); ) {
- if (it->first & mask) {
- if (it->second.first > 0) {
- ordered_waiters.insert(it->second);
- } else {
- ls.push_back(it->second.second);
- }
-// pdout(10,g_conf()->debug_mds) << (mdsco_db_line_prefix(this))
-// << "take_waiting mask " << hex << mask << dec << " took " << it->second
-// << " tag " << hex << it->first << dec
-// << " on " << *this
-// << dendl;
- waiting.erase(it++);
- } else {
-// pdout(10,g_conf()->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second
-// << " tag " << hex << it->first << dec
-// << " on " << *this
-// << dendl;
- ++it;
- }
- }
- for (auto it = ordered_waiters.begin(); it != ordered_waiters.end(); ++it) {
- ls.push_back(it->second);
- }
- if (waiting.empty()) {
- put(PIN_WAITER);
- waiting.clear();
- }
-}
-
-uint64_t MDSCacheObject::last_wait_seq = 0;
#include "MDSContext.h"
#include "include/elist.h"
+#include "MDSWaitable.h"
+
#define MDS_REF_SET // define me for improved debug output, sanity checking
//#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
//#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
MDSCacheObject *object;
};
-class MDSCacheObject {
+class MDSCacheObject: public MDSWaitable<MDSContext> {
public:
typedef mempool::mds_co::compact_map<mds_rank_t,unsigned> replica_map_type;
const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy
const static int STATE_REJOINUNDEF = (1<<26); // contents undefined.
+ constexpr static WaitTag const WAIT_ID_GLOBAL = WaitTag(WaitTag::ANY_ID); // lowest priority
// -- wait --
- const static uint64_t WAIT_ORDERED = (1ull<<61);
- const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
- const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
+ constexpr static WaitTag const WAIT_SINGLEAUTH = WAIT_ID_GLOBAL.bit_mask(WaitTag::MASK_BITS-1);
+ constexpr static WaitTag const WAIT_UNFREEZE = WAIT_ID_GLOBAL.bit_mask(WaitTag::MASK_BITS-2); // pka AUTHPINNABLE
elist<MDSCacheObject*>::item item_scrub; // for scrub inode or dir
unsigned get_replica_nonce() const { return replica_nonce; }
void set_replica_nonce(unsigned n) { replica_nonce = n; }
- bool is_waiter_for(uint64_t mask, uint64_t min=0);
- virtual void add_waiter(uint64_t mask, MDSContext *c) {
- if (waiting.empty())
- get(PIN_WAITER);
+ void add_waiter(WaitTag tag, MDSContext* c, bool ordered = false) override
+ {
+ if (waiting_empty()) {
+ get(PIN_WAITER);
+ }
+ MDSWaitable::add_waiter(tag, c, ordered);
+ }
- uint64_t seq = 0;
- if (mask & WAIT_ORDERED) {
- seq = ++last_wait_seq;
- mask &= ~WAIT_ORDERED;
+ virtual void take_waiting(WaitTag tag, MDSContext::vec& ls)
+ {
+ MDSWaitable::take_waiting(tag, std::back_inserter(ls));
+ if (waiting_empty()) {
+ put(PIN_WAITER);
}
- waiting.insert(std::pair<uint64_t, std::pair<uint64_t, MDSContext*> >(
- mask,
- std::pair<uint64_t, MDSContext*>(seq, c)));
-// pdout(10,g_conf()->debug_mds) << (mdsco_db_line_prefix(this))
-// << "add_waiter " << hex << mask << dec << " " << c
-// << " on " << *this
-// << dendl;
-
}
- virtual void take_waiting(uint64_t mask, MDSContext::vec& ls);
- void finish_waiting(uint64_t mask, int result = 0);
+ void finish_waiting(WaitTag tag, int result = 0)
+ {
+ MDSContext::vec finished;
+ take_waiting(tag, finished);
+ finish_contexts(g_ceph_context, finished, result);
+ }
// ---------------------------------------------
// locking
// noop unless overloaded.
- virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; }
- virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); }
- virtual void encode_lock_state(int type, ceph::buffer::list& bl) { ceph_abort(); }
- virtual void decode_lock_state(int type, const ceph::buffer::list& bl) { ceph_abort(); }
- virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); }
- virtual void add_lock_waiter(int type, uint64_t mask, MDSContext *c) { ceph_abort(); }
- virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; }
+ virtual SimpleLock* get_lock(int type)
+ {
+ ceph_abort();
+ return 0;
+ }
+ virtual void set_object_info(MDSCacheObjectInfo & info) { ceph_abort(); }
+ virtual void encode_lock_state(int type, ceph::buffer::list& bl) { ceph_abort(); }
+ virtual void decode_lock_state(int type, const ceph::buffer::list& bl) { ceph_abort(); }
+ virtual void finish_lock_waiters(int type, uint64_t mask, int r = 0) { ceph_abort(); }
+ virtual void add_lock_waiter(int type, uint64_t mask, MDSContext* c) { ceph_abort(); }
+ virtual bool is_lock_waiting(int type, uint64_t mask)
+ {
+ ceph_abort();
+ return false;
+ }
- virtual void clear_dirty_scattered(int type) { ceph_abort(); }
+ virtual void clear_dirty_scattered(int type) { ceph_abort(); }
- // ---------------------------------------------
- // ordering
- virtual bool is_lt(const MDSCacheObject *r) const = 0;
+ // ---------------------------------------------
+ // ordering
+ virtual bool is_lt(const MDSCacheObject* r) const = 0;
- // state
- protected:
- __u32 state = 0; // state bits
+ // state
+protected:
+ __u32 state = 0; // state bits
- // pins
- __s32 ref = 0; // reference count
+ // pins
+ __s32 ref = 0; // reference count
#ifdef MDS_REF_SET
mempool::mds_co::flat_map<int,int> ref_map;
#endif
// replication (across mds cluster)
unsigned replica_nonce = 0; // [replica] defined on replica
replica_map_type replica_map; // [auth] mds -> nonce
-
- // ---------------------------------------------
- // waiting
- private:
- mempool::mds_co::compact_multimap<uint64_t, std::pair<uint64_t, MDSContext*>> waiting;
- static uint64_t last_wait_seq;
-};
+ };
inline std::ostream& operator<<(std::ostream& out, const MDSCacheObject& o) {
o.print(out);
--- /dev/null
+#pragma once
+
+#include <cstdint>
+#include <stdexcept>
+#include <algorithm>
+#include <iterator>
+#include <concepts>
+
+#include "include/Context.h"
+#include "include/ceph_assert.h"
+#include "include/mempool.h"
+#include "include/types.h"
+
+#include "mdstypes.h"
+
+struct WaitTag {
+ static constexpr const int MASK_BITS = 48;
+ static constexpr const int ID_BITS = 16;
+ static constexpr const uint16_t ANY_ID = 0xffff;
+
+ static_assert(MASK_BITS + ID_BITS == 64);
+
+ static constexpr const uint64_t FULL_MASK = (uint64_t(1) << MASK_BITS)-1;
+
+ union {
+ uint64_t raw;
+ struct {
+ uint64_t mask : MASK_BITS;
+ uint64_t id : ID_BITS;
+ };
+ };
+
+ constexpr WaitTag(uint16_t id = ANY_ID)
+ : WaitTag(id, 0)
+ {
+ }
+ constexpr WaitTag(uint16_t id, uint64_t mask)
+ : mask(mask)
+ , id(id)
+ {
+ if ((mask & FULL_MASK) != mask) {
+ throw std::logic_error("mask too large");
+ }
+ }
+
+ WaitTag(WaitTag const& other) : raw(other.raw) { }
+
+ constexpr static WaitTag const from_raw(uint64_t raw) {
+ return WaitTag(raw >> MASK_BITS, raw & FULL_MASK);
+ }
+
+ constexpr static WaitTag const any(uint64_t mask = 0) {
+ return WaitTag(ANY_ID, mask);
+ }
+
+ constexpr WaitTag with_mask(uint64_t new_mask) const
+ {
+ return WaitTag(id, new_mask);
+ }
+
+ constexpr WaitTag or_mask(uint64_t or_mask) const
+ {
+ return WaitTag(id, mask | or_mask);
+ }
+
+ constexpr WaitTag and_mask(uint64_t and_mask) const
+ {
+ return WaitTag(id, mask & and_mask);
+ }
+
+ constexpr WaitTag bit_mask(int bit) const
+ {
+ return or_mask(uint64_t(1) << bit);
+ }
+
+ constexpr bool match(uint64_t raw) const
+ {
+ return match(from_raw(raw));
+ }
+
+ constexpr bool match(WaitTag const& other) const
+ {
+ return match_id(other) && (mask & other.mask);
+ }
+
+ constexpr bool match_id(WaitTag const& other) const
+ {
+ return (id == other.id || is_any_id() || other.is_any_id());
+ }
+
+ constexpr auto operator<=>(WaitTag const& other) const {
+ return raw <=> other.raw;
+ }
+
+ constexpr operator bool() const {
+ return mask != 0;
+ }
+
+ constexpr bool is_any_id() const {
+ return id == ANY_ID;
+ }
+};
+
+constexpr WaitTag operator~(WaitTag const& tag) {
+ return WaitTag(tag.id, ~tag.mask & WaitTag::FULL_MASK);
+}
+
+constexpr WaitTag operator|(WaitTag const& l, uint64_t mask) {
+ return l.or_mask(mask);
+}
+
+constexpr WaitTag operator|(WaitTag const& l, WaitTag const&r) {
+ if (!l.match_id(r)) {
+ throw std::logic_error("Can't combine tags with different IDs");
+ }
+ return WaitTag(std::min(l.id, r.id), l.mask | r.mask);
+}
+
+constexpr WaitTag operator&(WaitTag const& l, WaitTag const&r) {
+ if (!l.match_id(r)) {
+ throw std::logic_error("Can't combine tags with different IDs");
+ }
+ return WaitTag(std::min(l.id, r.id), l.mask & r.mask);
+}
+
+constexpr WaitTag operator&(WaitTag const& l, uint64_t mask) {
+ return l.and_mask(mask);
+}
+
+template <class CharT, class Traits>
+static std::basic_ostream<CharT, Traits>&
+operator<<(std::basic_ostream<CharT, Traits>& os, const WaitTag& tag)
+{
+ os << "WaitTag(";
+ if (tag.is_any_id()) {
+ os << "*";
+ } else {
+ os << tag.id;
+ }
+ os << ":" << std::hex << tag.mask << std::dec << ")";
+ return os;
+}
+
+
+template<std::derived_from<Context> C>
+struct MDSWaitable {
+ virtual void add_waiter(WaitTag tag, C* c, bool ordered = false)
+ {
+ uint64_t seq = 0;
+ if (ordered) {
+ seq = ++last_wait_seq;
+ }
+ waiting.insert({ tag, { seq, c } });
+ }
+
+ bool has_waiter_for(WaitTag tag)
+ {
+ auto it = MatchingIterator(waiting, tag);
+ return it != waiting.end();
+ }
+
+ template <std::output_iterator<C*> S>
+ void take_waiting(WaitTag tag, S &&sink)
+ {
+ if (waiting.empty())
+ return;
+
+ // process ordered waiters in the same order that they were added.
+ std::map<uint64_t, C*> ordered_waiters;
+
+ for (auto it = MatchingIterator(waiting, tag); it != waiting.end();) {
+ if (it->second.first > 0) {
+ ordered_waiters.insert(it->second);
+ } else {
+ *sink++ = it->second.second;
+ }
+ waiting.erase(it++);
+ }
+ for (auto it = ordered_waiters.begin(); it != ordered_waiters.end(); ++it) {
+ *sink++ = it->second;
+ }
+ }
+
+ bool waiting_empty() const {
+ return waiting.empty();
+ }
+
+ void waiting_clear() {
+ waiting.clear();
+ }
+
+ virtual ~MDSWaitable() { }
+
+ MDSWaitable() = default;
+ MDSWaitable(MDSWaitable<C> const& other) = default;
+ MDSWaitable(MDSWaitable<C> && other) = default;
+
+private:
+ using Waiters = mempool::mds_co::compact_multimap<WaitTag, std::pair<uint64_t, C*>>;
+ Waiters waiting;
+ uint64_t last_wait_seq = 0;
+
+ struct MatchingIterator {
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = std::pair<const WaitTag, std::pair<uint64_t, C*>>;
+ using pointer = value_type*;
+ using reference = value_type&;
+
+ Waiters & parent;
+ WaitTag tag;
+ typename Waiters::iterator current;
+
+ /// @brief Reduces any value to its most significant bit
+ /// @param value
+ /// @return a new value that has a single bit set at the position
+ /// of the input's msb
+ static uint64_t reduce_to_msb(uint64_t value) {
+ while (value & (value - 1)) // if more than one bit is set
+ value &= value - 1; // clear LSB
+ return value;
+ }
+
+ MatchingIterator(Waiters& parent, WaitTag tag)
+ : parent(parent)
+ , tag(tag)
+ {
+ auto min_mask = reduce_to_msb(tag.mask);
+
+ auto search_tag = WaitTag(tag.is_any_id() ? 0 : tag.id, min_mask);
+ current = parent.lower_bound(search_tag);
+ skip_to_matching();
+ }
+
+ MatchingIterator(MatchingIterator const& other) = default;
+
+ void skip_to_matching() {
+ while (true) {
+ if (current == parent.end()) {
+ return;
+ }
+ if (current->first.match(tag)) {
+ return;
+ }
+ if (!current->first.is_any_id() && current->first.id > tag.id) {
+ break;
+ }
+ ++current;
+ }
+ if (!tag.is_any_id()) {
+ current = parent.lower_bound(WaitTag::any());
+ skip_to_matching();
+ }
+ }
+
+ reference operator*() const { return *current; }
+ pointer operator->() { return current.operator->(); }
+
+ // Prefix increment
+ MatchingIterator& operator++()
+ {
+ ++current;
+ skip_to_matching();
+ return *this;
+ }
+
+ // Postfix increment
+ MatchingIterator operator++(int)
+ {
+ MatchingIterator tmp = *this;
+ ++(*this);
+ return tmp;
+ }
+
+ operator typename Waiters::iterator() { return current; }
+ operator typename Waiters::const_iterator() { return Waiters::const_iterator(current); }
+
+ friend bool operator==(const MatchingIterator& a, const MatchingIterator& b) { return a.current == b.current; };
+ friend bool operator==(const MatchingIterator& a, const typename Waiters::const_iterator& b) { return a.current == b; };
+ friend bool operator==(const MatchingIterator& a, const typename Waiters::iterator& b) { return a.current == b; };
+ friend bool operator!=(const MatchingIterator& a, const MatchingIterator& b) { return a.current != b.current; };
+ friend bool operator!=(const MatchingIterator& a, const typename Waiters::const_iterator& b) { return a.current != b; };
+ friend bool operator!=(const MatchingIterator& a, const typename Waiters::iterator& b) { return a.current != b; };
+ };
+};
f->close_section();
}
-int SimpleLock::get_wait_shift() const {
- switch (get_type()) {
- case CEPH_LOCK_DN: return 8;
- case CEPH_LOCK_DVERSION: return 8 + 1*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IAUTH: return 8 + 2*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_ILINK: return 8 + 3*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IDFT: return 8 + 4*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IFILE: return 8 + 5*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IVERSION: return 8 + 6*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IXATTR: return 8 + 7*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_ISNAP: return 8 + 8*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_INEST: return 8 + 9*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IFLOCK: return 8 +10*SimpleLock::WAIT_BITS;
- case CEPH_LOCK_IPOLICY: return 8 +11*SimpleLock::WAIT_BITS;
- default:
- ceph_abort();
- }
-}
-
int SimpleLock::get_cap_shift() const {
switch (get_type()) {
case CEPH_LOCK_IAUTH: return CEPH_CAP_SAUTH;
typedef boost::intrusive_ptr<MutationImpl> MutationRef;
struct LockType {
+ static const uint16_t WAIT_ID_OFFSET = 16;
explicit LockType(int t) : type(t) {
switch (type) {
case CEPH_LOCK_DN:
default:
sm = 0;
}
+
+ uint16_t lock_ord = LockType::WAIT_ID_OFFSET;
+ int lock_id = type;
+ while (!(lock_id & 1)) ++lock_ord, lock_id >>=1;
+ wait_id = WaitTag(lock_ord);
+ }
+
+ uint16_t ord() const {
+ return wait_id.id;
}
int type;
const sm_t *sm;
+ WaitTag wait_id;
};
class SimpleLock {
public:
// waiting
- static const uint64_t WAIT_RD = (1<<0); // to read
- static const uint64_t WAIT_WR = (1<<1); // to write
- static const uint64_t WAIT_XLOCK = (1<<2); // to xlock (** dup)
- static const uint64_t WAIT_STABLE = (1<<2); // for a stable state
- static const uint64_t WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock
- static const int WAIT_BITS = 4;
- static const uint64_t WAIT_ALL = ((1<<WAIT_BITS)-1);
+ static constexpr WaitTag WAIT_RD = WaitTag(WaitTag::ANY_ID).bit_mask(0); // to read
+ static constexpr WaitTag WAIT_WR = WaitTag(WaitTag::ANY_ID).bit_mask(1); // to write
+ static constexpr WaitTag WAIT_XLOCK = WaitTag(WaitTag::ANY_ID).bit_mask(2); // to xlock (** dup)
+ static constexpr WaitTag WAIT_STABLE = WaitTag(WaitTag::ANY_ID).bit_mask(2); // for a stable state
+ static constexpr WaitTag WAIT_REMOTEXLOCK = WaitTag(WaitTag::ANY_ID).bit_mask(3); // for a remote xlock
+ static const int WAIT_BITS = 4;
+ static constexpr WaitTag WAIT_ALL = WaitTag(WaitTag::ANY_ID,(1<<WAIT_BITS)-1);
static std::string_view get_state_name(int n) {
switch (n) {
SimpleLock(MDSCacheObject *o, LockType *lt) :
type(lt),
parent(o)
- {}
+ {
+ }
virtual ~SimpleLock() {}
client_t get_excl_client() const {
int get_type() const { return type->type; }
const sm_t* get_sm() const { return type->sm; }
- int get_wait_shift() const;
int get_cap_shift() const;
int get_cap_mask() const;
void encode_locked_state(ceph::buffer::list& bl) {
parent->encode_lock_state(type->type, bl);
}
- void finish_waiters(uint64_t mask, int r=0) {
- parent->finish_waiting(mask << get_wait_shift(), r);
+
+ void finish_waiters(WaitTag mask, int r=0) {
+ parent->finish_waiting(type->wait_id | mask, r);
}
- void take_waiting(uint64_t mask, MDSContext::vec& ls) {
- parent->take_waiting(mask << get_wait_shift(), ls);
+ void take_waiting(WaitTag mask, MDSContext::vec& ls) {
+ parent->take_waiting(type->wait_id | mask, ls);
}
- void add_waiter(uint64_t mask, MDSContext *c) {
- parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
+ void add_waiter(WaitTag mask, MDSContext *c) {
+ parent->add_waiter(type->wait_id | mask, c, true);
}
- bool is_waiter_for(uint64_t mask) const {
- return parent->is_waiter_for(mask << get_wait_shift());
+ bool is_waiter_for(WaitTag mask) const {
+ return parent->has_waiter_for(type->wait_id | mask);
}
bool is_cached() const {
state = s;
if (is_stable())
- take_waiting(SimpleLock::WAIT_ALL, waiters);
+ take_waiting(type->wait_id | SimpleLock::WAIT_ALL, waiters);
}
bool is_stable() const {
protected:
// parent (what i lock)
- MDSCacheObject *parent;
+ MDSCacheObject * const parent;
// lock state
__s16 state = LOCK_SYNC;
add_ceph_unittest(unittest_mds_sessionfilter)
target_link_libraries(unittest_mds_sessionfilter mds osdc ceph-common global ${BLKID_LIBRARIES})
+# unittest_mds_waitable
+add_executable(unittest_mds_waitable
+ TestMDSWaitable.cc
+ $<TARGET_OBJECTS:unit-main>
+)
+add_ceph_unittest(unittest_mds_waitable)
+target_link_libraries(unittest_mds_waitable ceph-common global)
--- /dev/null
+
+#include "mds/MDSWaitable.h"
+#include "gtest/gtest.h"
+#include <random>
+#include <vector>
+#include <string>
+
+TEST(MDSWaitable, ConstexprWaitTags) {
+
+ // the next line fails due to mask being larger than supported
+ // constexpr auto a = WaitTag::any(WaitTag::FULL_MASK << 1);
+
+
+ // constexpr auto a = WaitTag(1, WaitTag::FULL_MASK);
+ // constexpr auto b = WaitTag(2, WaitTag::FULL_MASK);
+ // the below two lines fail because tag ids are different
+ //constexpr auto c = a | b;
+ //constexpr auto c = a & ~b;
+}
+
+TEST(MDSWaitable, WaitTags)
+{
+ std::random_device rd;
+ std::mt19937_64 gen(rd());
+
+ uint64_t raw1 = gen();
+
+ auto a = WaitTag::from_raw(raw1);
+
+ EXPECT_EQ((raw1 >> WaitTag::MASK_BITS), a.id);
+ EXPECT_EQ((raw1 & WaitTag::FULL_MASK), a.mask);
+ EXPECT_EQ(raw1, a.raw);
+
+ uint64_t mask2 = gen() & WaitTag::FULL_MASK;
+ uint64_t raw2 = (raw1 & ~WaitTag::FULL_MASK) | mask2;
+
+ auto b = a | mask2;
+ EXPECT_EQ(raw1 | raw2, b.raw);
+
+ auto c = a & mask2;
+ EXPECT_EQ(raw1 & raw2, c.raw);
+
+ auto d = b & c;
+ EXPECT_EQ(((raw1 | raw2) & raw1 & raw2) & WaitTag::FULL_MASK, d.mask);
+}
+
+struct TagContext : public Context {
+ TagContext(WaitTag const tag)
+ : tag(tag)
+ {
+ }
+ void finish(int r) override
+ {
+ }
+
+ void complete(int r) override {
+ Context::complete(r);
+ }
+ const WaitTag tag;
+};
+
+static void add_waiter(MDSWaitable<TagContext>& w, WaitTag tag) {
+ w.add_waiter(tag, new TagContext(tag));
+}
+
+TEST(MDSWaitable, AddTake)
+{
+ WaitTag any0 = WaitTag::any().bit_mask(0);
+ WaitTag any1 = WaitTag::any().bit_mask(1);
+ WaitTag any2 = WaitTag::any().bit_mask(2);
+ WaitTag one0 = WaitTag(1).bit_mask(0);
+ WaitTag one1 = WaitTag(1).bit_mask(1);
+ WaitTag two0 = WaitTag(2).bit_mask(0);
+ WaitTag two1 = WaitTag(2).bit_mask(1);
+ WaitTag tre1 = WaitTag(3).bit_mask(1);
+
+ MDSWaitable<TagContext> setup;
+
+ EXPECT_TRUE(setup.waiting_empty());
+
+ add_waiter(setup, any0);
+ add_waiter(setup, any1);
+ add_waiter(setup, one0);
+ add_waiter(setup, one1);
+ add_waiter(setup, two0);
+ add_waiter(setup, two1);
+ add_waiter(setup, one1 | any0);
+ add_waiter(setup, one1 | one0);
+ add_waiter(setup, two1 | any0);
+ add_waiter(setup, two1 | two0);
+
+ EXPECT_FALSE(setup.waiting_empty());
+
+ EXPECT_TRUE(setup.has_waiter_for(any0));
+ EXPECT_TRUE(setup.has_waiter_for(any1));
+ EXPECT_TRUE(setup.has_waiter_for(one0));
+ EXPECT_TRUE(setup.has_waiter_for(one1));
+ EXPECT_TRUE(setup.has_waiter_for(one1 | any0));
+ EXPECT_TRUE(setup.has_waiter_for(one1 | one0));
+ EXPECT_TRUE(setup.has_waiter_for(tre1)); // because of any1
+ EXPECT_FALSE(setup.has_waiter_for(any2));
+
+ {
+ auto w = setup;
+ std::vector<TagContext*> cc;
+ w.take_waiting(one0, std::back_inserter(cc));
+ EXPECT_EQ(4, cc.size());
+ EXPECT_FALSE(w.has_waiter_for(one0));
+ }
+
+ {
+ auto w = setup;
+ std::vector<TagContext*> cc;
+ w.take_waiting(two1, std::back_inserter(cc));
+ EXPECT_EQ(4, cc.size());
+ EXPECT_FALSE(w.has_waiter_for(two1));
+ }
+
+ {
+ auto w = setup;
+ std::vector<TagContext*> cc;
+ w.take_waiting(any0, std::back_inserter(cc));
+ EXPECT_EQ(7, cc.size());
+ EXPECT_FALSE(w.has_waiter_for(two0));
+ EXPECT_FALSE(w.has_waiter_for(one0));
+ }
+}