From: Leonid Usov Date: Sat, 11 May 2024 14:00:21 +0000 (+0300) Subject: squid: mds: enhance the `lock path` asok command X-Git-Tag: v19.1.1~299^2~16 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f18724fb99e01337547cf068b0ed8da2953bef54;p=ceph.git squid: mds: enhance the `lock path` asok command * when the quiesce lock is taken by this op, don't consider the inode `quiesced` * drop all locks taken during traversal * drop all local authpins after the locks are taken * add --await functionality that will block the command until locks are taken or an error is encountered * return the RC that represents the operation result. 0 if the operation was scheduled and hasn't failed so far * add authpin control flags ** --ap-freeze - to auth_pin_freeze the target inode ** --ap-dont-block - to pass auth_pin_nonblocking when acquiring the target inode locks Signed-off-by: Leonid Usov (cherry picked from commit 3552fc5a9ea17c173a18be41fa15fbbae8d77edf) Fixes: https://tracker.ceph.com/issues/66154 --- diff --git a/qa/tasks/cephfs/test_quiesce.py b/qa/tasks/cephfs/test_quiesce.py index d66f67a18641f..f8bcacfb6db03 100644 --- a/qa/tasks/cephfs/test_quiesce.py +++ b/qa/tasks/cephfs/test_quiesce.py @@ -830,7 +830,7 @@ class TestQuiesceMultiRank(QuiesceTestCase): op = self.fs.rank_tell("lock", "path", self.mntpnt+"/dir1/dir2", "policy:r", rank=1) p = self.mount_a.setfattr("dir1/dir2", "ceph.quiesce.block", "1", wait=False) sleep(2) # for req to block waiting for xlock on policylock - reqid = self._reqid_tostr(op['op']['reqid']) + reqid = self._reqid_tostr(op['reqid']) self.fs.kill_op(reqid, rank=1) p.wait() diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index b588e017abfb0..8aa674186c0a8 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -5597,6 +5597,18 @@ void CInode::get_subtree_dirfrags(std::vector& v) const } } +bool CInode::is_quiesced() const { + if (!quiescelock.is_xlocked()) { + return false; + } + // check that it's the quiesce op that's holding the lock + auto mut = quiescelock.get_xlock_by(); + ceph_assert(mut); /* that would be weird */ + auto* mdr = dynamic_cast(mut.get()); + ceph_assert(mdr); /* also would be weird */ + return mdr->internal_op == CEPH_MDS_OP_QUIESCE_INODE; +} + bool CInode::will_block_for_quiesce(const MDRequestRef& mdr) { if (mdr && mdr->is_wrlocked(&quiescelock)) { return false; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index d3478389e68a7..18b198d7d2634 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -662,7 +662,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counteris_file(); } bool is_symlink() const { return get_inode()->is_symlink(); } bool is_dir() const { return get_inode()->is_dir(); } - bool is_quiesced() const { return quiescelock.is_xlocked(); } + bool is_quiesced() const; bool will_block_for_quiesce(const MDRequestRef& mdr = MDRequestRef {}); bool is_head() const { return last == CEPH_NOSNAP; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index f715426802fa9..008d796c84278 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -118,7 +118,8 @@ public: }; struct LockPathState { - std::vector locks; + const MDCache::LockPathConfig config; + CInode* in = nullptr; }; struct QuiesceInodeState { @@ -13990,25 +13991,36 @@ MDRequestRef MDCache::quiesce_path(filepath p, C_MDS_QuiescePath* c, Formatter * void MDCache::dispatch_lock_path(const MDRequestRef& mdr) { - CInode* in = nullptr; CF_MDS_RetryRequestFactory cf(this, mdr, true); - static const int ptflags = 0 - | MDS_TRAVERSE_DISCOVER - | MDS_TRAVERSE_RDLOCK_PATH - | MDS_TRAVERSE_WANT_INODE - ; - int r = path_traverse(mdr, cf, mdr->get_filepath(), ptflags, nullptr, &in); - if (r > 0) - return; - if (r < 0) { - mds->server->respond_to_request(mdr, r); - return; + auto& lps = *static_cast(mdr->internal_op_private); + CInode* in = lps.in; + + if (!in) { + static const int ptflags = 0 + | MDS_TRAVERSE_DISCOVER + | MDS_TRAVERSE_RDLOCK_PATH + | MDS_TRAVERSE_WANT_INODE + ; + + // TODO: honor `lps.dont_block` in the path traverse? + int r = path_traverse(mdr, cf, mdr->get_filepath(), ptflags, nullptr, &in); + if (r > 0) + return; + if (r < 0) { + mds->server->respond_to_request(mdr, r); + return; + } + lps.in = in; } - auto& lps = *static_cast(mdr->internal_op_private); + // since we have our inode, let's drop all locks from the traversal, + // because ideally, we only want to hold the locks from the command + mds->locker->drop_locks(mdr.get()); + + mdr->mark_event("acquired target inode"); MutationImpl::LockOpVec lov; - for (const auto &lock : lps.locks) { + for (const auto &lock : lps.config.locks) { auto colonps = lock.find(':'); if (colonps == std::string::npos) { mds->server->respond_to_request(mdr, -CEPHFS_EINVAL); @@ -14065,19 +14077,36 @@ void MDCache::dispatch_lock_path(const MDRequestRef& mdr) } } - if (!mds->locker->acquire_locks(mdr, lov, nullptr, {in}, false, true)) { + if (!mds->locker->acquire_locks(mdr, lov, lps.config.ap_freeze ? in : nullptr, {in}, lps.config.ap_dont_block, true)) { + if (lps.config.ap_dont_block && mdr->aborted) { + mds->server->respond_to_request(mdr, -CEPHFS_EWOULDBLOCK); + } return; } + if (!lps.config.ap_freeze) { + // go stealth + mdr->drop_local_auth_pins(); + } + + mdr->mark_event("object locked"); + if (auto c = mdr->internal_op_finish) { + mdr->internal_op_finish = nullptr; + c->complete(0); + } /* deliberately leak until killed */ } -MDRequestRef MDCache::lock_path(filepath p, std::vector locks) +MDRequestRef MDCache::lock_path(LockPathConfig config, std::function on_locked) { MDRequestRef mdr = request_start_internal(CEPH_MDS_OP_LOCK_PATH); - mdr->set_filepath(p); - mdr->internal_op_finish = new LambdaContext([](int r) {}); - mdr->internal_op_private = new LockPathState{locks}; + mdr->set_filepath(config.fpath); + if (on_locked) { + mdr->internal_op_finish = new LambdaContext([mdr, cb = std::move(on_locked)](int rc) { + cb(mdr); + }); + } + mdr->internal_op_private = new LockPathState{std::move(config)}; dispatch_request(mdr); return mdr; } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index fdee140a2cd25..8be5f2fde5ff5 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -627,7 +627,14 @@ private: } void add_quiesce(CInode* parent, CInode* in); - MDRequestRef lock_path(filepath p, std::vector locks); + struct LockPathConfig { + filepath fpath; + std::vector locks; + bool ap_dont_block = false; + bool ap_freeze = false; + }; + + MDRequestRef lock_path(LockPathConfig config, std::function on_locked = {}); void clean_open_file_lists(); void dump_openfiles(Formatter *f); diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index 1ea641cf3ba78..9f39fd8f6fa4a 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -363,6 +363,9 @@ void MDSDaemon::set_up_admin_socket() r = admin_socket->register_command("lock path" " name=path,type=CephString,req=true" " name=locks,type=CephString,n=N,req=false" + " name=ap_dont_block,type=CephBool,req=false" + " name=ap_freeze,type=CephBool,req=false" + " name=await,type=CephBool,req=false" ,asok_hook ,"lock a path"); ceph_assert(r == 0); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 94a48ae913bed..73e48889b584e 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -58,6 +58,7 @@ using std::set; using std::string; using std::vector; using TOPNSPC::common::cmd_getval; +using TOPNSPC::common::cmd_getval_or; class C_Flush_Journal : public MDSInternalContext { public: @@ -3020,7 +3021,8 @@ void MDSRankDispatcher::handle_asok_command( } else if (command == "quiesce path") { r = command_quiesce_path(f, cmdmap, *css); } else if (command == "lock path") { - r = command_lock_path(f, cmdmap, *css); + command_lock_path(f, cmdmap, std::move(on_finish)); + return; } else if (command == "dump tree") { command_dump_tree(cmdmap, *css, f); } else if (command == "dump loads") { @@ -3509,28 +3511,48 @@ int MDSRank::command_quiesce_path(Formatter* f, const cmdmap_t& cmdmap, std::ost return 0; } -int MDSRank::command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::ostream& ss) +void MDSRank::command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::function on_finish) { std::string path; - { - bool got = cmd_getval(cmdmap, "path", path); - if (!got) { - ss << "missing path"; - return -CEPHFS_EINVAL; - } + + if (!cmd_getval(cmdmap, "path", path)) { + bufferlist bl; + on_finish(-EINVAL, "missing path", bl); + return; } - std::vector locks; - cmd_getval(cmdmap, "locks", locks); + MDCache::LockPathConfig config; + + cmd_getval(cmdmap, "locks", config.locks); + config.ap_dont_block = cmd_getval_or(cmdmap, "ap_dont_block", false); + config.ap_freeze = cmd_getval_or(cmdmap, "ap_freeze", false); + config.fpath = filepath(path); + bool await = cmd_getval_or(cmdmap, "await", false); + + auto respond = [f, on_finish=std::move(on_finish)](MDRequestRef const& req) { + f->open_object_section("response"); + req->dump_with_mds_lock(f); + f->close_section(); + + bufferlist bl; + // need to do this manually, because the default asock + // on_finish handler doesn't flush the formatter for rc < 0 + f->flush(bl); + auto cephrc = req->result.value_or(0); // SUCCESS makes more sense for this API than EINPROGRESS + auto rc = cephrc < 0 ? -ceph_to_hostos_errno(-cephrc) : cephrc; + on_finish(rc, "", bl); + }; - f->open_object_section("lock"); { std::lock_guard l(mds_lock); - auto mdr = mdcache->lock_path(filepath(path), locks); - f->dump_object("op", *mdr); + if (await) { + mdcache->lock_path(std::move(config), std::move(respond)); + } else { + auto mdr = mdcache->lock_path(std::move(config)); + // respond at once + respond(mdr); + } } - f->close_section(); - return 0; } void MDSRank::command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss) diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 02b9466f43942..128050ebe3493 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -527,7 +527,7 @@ class MDSRank { void command_openfiles_ls(Formatter *f); void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f); int command_quiesce_path(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); - int command_lock_path(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); + void command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::function on_finish); void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); void command_dump_dir(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss); void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish); diff --git a/src/mds/Mutation.cc b/src/mds/Mutation.cc index b7769319cb1cf..5ebea369fd70e 100644 --- a/src/mds/Mutation.cc +++ b/src/mds/Mutation.cc @@ -478,7 +478,11 @@ void MDRequestImpl::print(ostream &out) const void MDRequestImpl::_dump(Formatter *f, bool has_mds_lock) const { std::lock_guard l(lock); - f->dump_int("result", result); + if (result) { + f->dump_int("result", *result); + } else { + f->dump_null("result"); + } f->dump_string("flag_point", _get_state_string()); f->dump_object("reqid", reqid); if (client_request) { diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index e1b905b33d8cb..19cf624b12633 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -15,7 +15,7 @@ #ifndef CEPH_MDS_MUTATION_H #define CEPH_MDS_MUTATION_H -#include +#include #include "include/interval_set.h" #include "include/elist.h" @@ -242,7 +242,7 @@ public: void _dump_op_descriptor(std::ostream& stream) const override; metareqid_t reqid; - int result = std::numeric_limits::min(); + std::optional result; __u32 attempt = 0; // which attempt for this request LogSegment *ls = nullptr; // the log segment i'm committing to