From 9b419b91f1be9c18151b3378bf1a0498fb856cbc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 30 Sep 2019 14:20:17 +0800 Subject: [PATCH] mds: delegete lock cache to client Define cap bits for async dir operation. Lock cache for a given type of dir operation can be delegeted to client through cap mechanism. As long as client holds correspondindg cap, dir operation requests from the client can use the lock cache. If mds want to invalidate a lock cache, it needs to first revoke corresponding cap from client. Signed-off-by: "Yan, Zheng" --- src/include/ceph_fs.h | 9 +++- src/mds/CDir.h | 3 ++ src/mds/CInode.cc | 6 ++- src/mds/Capability.h | 17 +++++-- src/mds/Locker.cc | 108 ++++++++++++++++++++++++++++++++++-------- src/mds/Locker.h | 2 + 6 files changed, 118 insertions(+), 27 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index c941e12d4b4..4dd8b97413c 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -763,7 +763,7 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) #define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) #define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) -#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) +#define CEPH_CAP_FILE(x) ((x) << CEPH_CAP_SFILE) #define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) #define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) #define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) @@ -818,6 +818,13 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ CEPH_LOCK_IXATTR) +/* cap masks async dir operations */ +#define CEPH_CAP_DIR_CREATE CEPH_CAP_FILE_CACHE +#define CEPH_CAP_DIR_UNLINK CEPH_CAP_FILE_RD +#define CEPH_CAP_ANY_DIR_OPS (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | \ + CEPH_CAP_FILE_WREXTEND | CEPH_CAP_FILE_LAZYIO) + + int ceph_caps_for_mode(int mode); enum { diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 43ebfcb53c8..c72fb811404 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -581,6 +581,9 @@ public: bool is_any_freezing_or_frozen_inode() const { return num_frozen_inodes || !freezing_inodes.empty(); } + bool is_auth_pinned_by_lock_cache() const { + return frozen_inode_suppressed; + } void disable_frozen_inode() { ceph_assert(num_frozen_inodes == 0); frozen_inode_suppressed++; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index f45617553c1..c2ced918915 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -3442,7 +3442,11 @@ int CInode::get_caps_allowed_for_client(Session *session, Capability *cap, allowed = get_caps_allowed_by_type(CAP_ANY); } - if (!is_dir()) { + if (is_dir()) { + allowed &= ~CEPH_CAP_ANY_DIR_OPS; + if (cap && (allowed & CEPH_CAP_FILE_EXCL)) + allowed |= cap->get_lock_cache_allowed(); + } else { if (file_i->inline_data.version == CEPH_INLINE_NONE && file_i->layout.pool_ns.empty()) { // noop diff --git a/src/mds/Capability.h b/src/mds/Capability.h index a20685e2456..218eeb40fce 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -182,8 +182,8 @@ public: inc_last_seq(); return last_sent; } - void confirm_receipt(ceph_seq_t seq, unsigned caps) { - bool was_revoking = (_issued & ~_pending); + int confirm_receipt(ceph_seq_t seq, unsigned caps) { + int was_revoking = (_issued & ~_pending); if (seq == last_sent) { _revokes.clear(); _issued = caps; @@ -208,7 +208,7 @@ public: item_client_revoking_caps.remove_myself(); maybe_clear_notable(); } - //check_rdcaps_list(); + return was_revoking & ~_issued; // return revoked } // we may get a release racing with revocations, which means our revokes will be ignored // by the client. clean them out of our _revokes history so we don't wait on them. @@ -341,9 +341,10 @@ public: set_wanted(wanted() | otherwanted); } - void revoke() { + int revoke() { if (revoking()) - confirm_receipt(last_sent, pending()); + return confirm_receipt(last_sent, pending()); + return 0; } // serializers @@ -364,6 +365,10 @@ public: xlist::item item_client_revoking_caps; elist lock_caches; + int get_lock_cache_allowed() const { return lock_cache_allowed; } + void set_lock_cache_allowed(int c) { lock_cache_allowed |= c; } + void clear_lock_cache_allowed(int c) { lock_cache_allowed &= ~c; } + private: void calc_issued() { _issued = _pending; @@ -401,6 +406,8 @@ private: int suppress = 0; unsigned state = 0; + + int lock_cache_allowed = 0; }; WRITE_CLASS_ENCODER(Capability::Export) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index a6a19600420..6d810fffc02 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -795,15 +795,58 @@ void Locker::put_lock_cache(MDLockCache* lock_cache) mds->queue_waiter(new C_MDL_DropCache(this, lock_cache)); } +int Locker::get_cap_bit_for_lock_cache(int op) +{ + switch(op) { + case CEPH_MDS_OP_CREATE: + return CEPH_CAP_DIR_CREATE; + case CEPH_MDS_OP_UNLINK: + return CEPH_CAP_DIR_UNLINK; + default: + ceph_assert(0 == "unsupported operation"); + return 0; + } +} + void Locker::invalidate_lock_cache(MDLockCache *lock_cache) { ceph_assert(lock_cache->item_cap_lock_cache.is_on_list()); - ceph_assert(!lock_cache->invalidating); - lock_cache->invalidating = true; - lock_cache->detach_all(); - // XXX check issued caps - lock_cache->item_cap_lock_cache.remove_myself(); - put_lock_cache(lock_cache); + if (lock_cache->invalidating) { + ceph_assert(!lock_cache->client_cap); + } else { + lock_cache->invalidating = true; + lock_cache->detach_all(); + } + + Capability *cap = lock_cache->client_cap; + if (cap) { + int cap_bit = get_cap_bit_for_lock_cache(lock_cache->opcode); + cap->clear_lock_cache_allowed(cap_bit); + if (cap->issued() & cap_bit) + issue_caps(lock_cache->get_dir_inode(), cap); + else + cap = nullptr; + } + + if (!cap) { + lock_cache->item_cap_lock_cache.remove_myself(); + put_lock_cache(lock_cache); + } +} + +void Locker::eval_lock_caches(Capability *cap) +{ + for (auto p = cap->lock_caches.begin(); !p.end(); ) { + MDLockCache *lock_cache = *p; + ++p; + if (!lock_cache->invalidating) + continue; + int cap_bit = get_cap_bit_for_lock_cache(lock_cache->opcode); + if (!(cap->issued() & cap_bit)) { + lock_cache->item_cap_lock_cache.remove_myself(); + put_lock_cache(lock_cache); + } + } } // ask lock caches to release auth pins @@ -850,6 +893,13 @@ void Locker::create_lock_cache(MDRequestRef& mdr, CInode *diri) return; } + for (auto p = cap->lock_caches.begin(); !p.end(); ++p) { + if ((*p)->opcode == opcode) { + dout(10) << " lock cache already exists for " << ceph_mds_op_name(opcode) << ", noop" << dendl; + return; + } + } + set ancestors; for (CInode *in = diri; ; ) { CDentry *pdn = in->get_projected_parent_dn(); @@ -905,6 +955,7 @@ void Locker::create_lock_cache(MDRequestRef& mdr, CInode *diri) } auto lock_cache = new MDLockCache(cap, opcode); + cap->set_lock_cache_allowed(get_cap_bit_for_lock_cache(opcode)); for (auto dir : dfv) { // prevent subtree migration @@ -2234,6 +2285,11 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) // add in any xlocker-only caps (for locks this client is the xlocker for) allowed |= xlocker_allowed & in->get_xlocker_mask(it->first); + if (in->is_dir()) { + allowed &= ~CEPH_CAP_ANY_DIR_OPS; + if (allowed & CEPH_CAP_FILE_EXCL) + allowed |= cap->get_lock_cache_allowed(); + } if ((in->inode.inline_data.version != CEPH_INLINE_NONE && cap->is_noinline()) || @@ -2411,7 +2467,9 @@ bool Locker::revoke_stale_caps(Session *session) int issued = cap->issued(); CInode *in = cap->get_inode(); dout(10) << " revoking " << ccap_string(issued) << " on " << *in << dendl; - cap->revoke(); + int revoked = cap->revoke(); + if (revoked & CEPH_CAP_ANY_DIR_OPS) + eval_lock_caches(cap); if (in->is_auth() && in->inode.client_ranges.count(cap->get_client())) @@ -2925,20 +2983,25 @@ void Locker::_do_null_snapflush(CInode *head_in, client_t client, snapid_t last) bool Locker::should_defer_client_cap_frozen(CInode *in) { + if (in->is_frozen()) + return true; + /* - * This policy needs to be AT LEAST as permissive as allowing a client request - * to go forward, or else a client request can release something, the release - * gets deferred, but the request gets processed and deadlocks because when the - * caps can't get revoked. + * This policy needs to be AT LEAST as permissive as allowing a client + * request to go forward, or else a client request can release something, + * the release gets deferred, but the request gets processed and deadlocks + * because when the caps can't get revoked. * - * Currently, a request wait if anything locked is freezing (can't - * auth_pin), which would avoid any deadlock with cap release. Thus @in - * _MUST_ be in the lock/auth_pin set. - * - * auth_pins==0 implies no unstable lock and not auth pinnned by - * client request, otherwise continue even it's freezing. + * No auth_pin implies that there is no unstable lock and @in is not auth + * pinnned by client request. If parent dirfrag is auth pinned by a lock + * cache, later request from lock cache owner may forcibly auth pin the @in. */ - return (in->is_freezing() && in->get_num_auth_pins() == 0) || in->is_frozen(); + if (in->is_freezing() && in->get_num_auth_pins() == 0) { + CDir* dir = in->get_parent_dir(); + if (!dir || !dir->is_auth_pinned_by_lock_cache()) + return true; + } + return false; } void Locker::handle_client_caps(const cref_t &m) @@ -3159,12 +3222,14 @@ void Locker::handle_client_caps(const cref_t &m) caps &= cap->issued(); } - cap->confirm_receipt(m->get_seq(), caps); + int revoked = cap->confirm_receipt(m->get_seq(), caps); dout(10) << " follows " << follows << " retains " << ccap_string(m->get_caps()) << " dirty " << ccap_string(dirty) << " on " << *in << dendl; + if (revoked & CEPH_CAP_ANY_DIR_OPS) + eval_lock_caches(cap); // missing/skipped snapflush? // The client MAY send a snapflush if it is issued WR/EXCL caps, but @@ -3316,7 +3381,9 @@ void Locker::process_request_cap_release(MDRequestRef& mdr, client_t client, con dout(10) << " confirming not issued caps " << ccap_string(caps & ~cap->issued()) << dendl; caps &= cap->issued(); } - cap->confirm_receipt(seq, caps); + int revoked = cap->confirm_receipt(seq, caps); + if (revoked & CEPH_CAP_ANY_DIR_OPS) + eval_lock_caches(cap); if (!in->client_need_snapflush.empty() && (cap->issued() & CEPH_CAP_ANY_FILE_WR) == 0) { @@ -3848,6 +3915,7 @@ void Locker::remove_client_cap(CInode *in, Capability *cap, bool kill) while (!cap->lock_caches.empty()) { MDLockCache* lock_cache = cap->lock_caches.front(); + lock_cache->client_cap = nullptr; invalidate_lock_cache(lock_cache); } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index be4f3bf54c9..97dc68ede7f 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -67,11 +67,13 @@ public: void drop_rdlocks_for_early_reply(MutationImpl *mut); void drop_locks_for_fragment_unfreeze(MutationImpl *mut); + int get_cap_bit_for_lock_cache(int op); void create_lock_cache(MDRequestRef& mdr, CInode *diri); bool find_and_attach_lock_cache(MDRequestRef& mdr, CInode *diri); void invalidate_lock_caches(CDir *dir); void invalidate_lock_caches(SimpleLock *lock); void invalidate_lock_cache(MDLockCache *lock_cache); + void eval_lock_caches(Capability *cap); void put_lock_cache(MDLockCache* lock_cache); void eval_gather(SimpleLock *lock, bool first=false, bool *need_issue=0, MDSContext::vec *pfinishers=0); -- 2.39.5