]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
squid: mds: enhance the `lock path` asok command
authorLeonid Usov <leonid.usov@ibm.com>
Sat, 11 May 2024 14:00:21 +0000 (17:00 +0300)
committerLeonid Usov <leonid.usov@ibm.com>
Tue, 28 May 2024 16:06:19 +0000 (19:06 +0300)
* when the quiesce lock is taken by this op, don't consider the inode `quiesced`
* drop all locks taken during traversal
* drop all local authpins after the locks are taken
* add --await functionality that will block the command until locks are taken or an error is encountered
* return the RC that represents the operation result. 0 if the operation was scheduled and hasn't failed so far
* add authpin control flags
** --ap-freeze - to auth_pin_freeze the target inode
** --ap-dont-block - to pass auth_pin_nonblocking when acquiring the target inode locks

Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
(cherry picked from commit 3552fc5a9ea17c173a18be41fa15fbbae8d77edf)
Fixes: https://tracker.ceph.com/issues/66154
qa/tasks/cephfs/test_quiesce.py
src/mds/CInode.cc
src/mds/CInode.h
src/mds/MDCache.cc
src/mds/MDCache.h
src/mds/MDSDaemon.cc
src/mds/MDSRank.cc
src/mds/MDSRank.h
src/mds/Mutation.cc
src/mds/Mutation.h

index d66f67a18641f52850df4d54abe097ea82056c9f..f8bcacfb6db0327ba836f348b0d3a6786b91dd4f 100644 (file)
@@ -830,7 +830,7 @@ class TestQuiesceMultiRank(QuiesceTestCase):
         op = self.fs.rank_tell("lock", "path", self.mntpnt+"/dir1/dir2", "policy:r", rank=1)
         p = self.mount_a.setfattr("dir1/dir2", "ceph.quiesce.block", "1", wait=False)
         sleep(2) # for req to block waiting for xlock on policylock
-        reqid = self._reqid_tostr(op['op']['reqid'])
+        reqid = self._reqid_tostr(op['reqid'])
         self.fs.kill_op(reqid, rank=1)
         p.wait()
 
index b588e017abfb0ea13c793a6b57f627e6106d61d8..8aa674186c0a813c48ab39800cef8887e6b4a6e5 100644 (file)
@@ -5597,6 +5597,18 @@ void CInode::get_subtree_dirfrags(std::vector<CDir*>& v) const
   }
 }
 
+bool CInode::is_quiesced() const { 
+  if (!quiescelock.is_xlocked()) {
+    return false;
+  }
+  // check that it's the quiesce op that's holding the lock
+  auto mut = quiescelock.get_xlock_by();
+  ceph_assert(mut); /* that would be weird */
+  auto* mdr = dynamic_cast<MDRequestImpl*>(mut.get());
+  ceph_assert(mdr); /* also would be weird */
+  return mdr->internal_op == CEPH_MDS_OP_QUIESCE_INODE;
+}
+
 bool CInode::will_block_for_quiesce(const MDRequestRef& mdr) {
   if (mdr && mdr->is_wrlocked(&quiescelock)) {
     return false;
index d3478389e68a7df3d746f0cd34f9332bf6ceff28..18b198d7d263477617afae51f49c9522d22894bf 100644 (file)
@@ -662,7 +662,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
   bool is_file() const    { return get_inode()->is_file(); }
   bool is_symlink() const { return get_inode()->is_symlink(); }
   bool is_dir() const     { return get_inode()->is_dir(); }
-  bool is_quiesced() const { return quiescelock.is_xlocked(); }
+  bool is_quiesced() const;
   bool will_block_for_quiesce(const MDRequestRef& mdr = MDRequestRef {});
 
   bool is_head() const { return last == CEPH_NOSNAP; }
index f715426802fa9e7db6ae2dab4155cfc32656768a..008d796c8427818e7ff3ff5edc95aac078e99f18 100644 (file)
@@ -118,7 +118,8 @@ public:
 };
 
 struct LockPathState {
-  std::vector<std::string> locks;
+  const MDCache::LockPathConfig config;
+  CInode* in = nullptr;
 };
 
 struct QuiesceInodeState {
@@ -13990,25 +13991,36 @@ MDRequestRef MDCache::quiesce_path(filepath p, C_MDS_QuiescePath* c, Formatter *
 
 void MDCache::dispatch_lock_path(const MDRequestRef& mdr)
 {
-  CInode* in = nullptr;
   CF_MDS_RetryRequestFactory cf(this, mdr, true);
-  static const int ptflags = 0
-    | MDS_TRAVERSE_DISCOVER
-    | MDS_TRAVERSE_RDLOCK_PATH
-    | MDS_TRAVERSE_WANT_INODE
-    ;
-  int r = path_traverse(mdr, cf, mdr->get_filepath(), ptflags, nullptr, &in);
-  if (r > 0)
-    return;
-  if (r < 0) {
-    mds->server->respond_to_request(mdr, r);
-    return;
+  auto& lps = *static_cast<LockPathState*>(mdr->internal_op_private);
+  CInode* in = lps.in;
+
+  if (!in) {
+    static const int ptflags = 0
+      | MDS_TRAVERSE_DISCOVER
+      | MDS_TRAVERSE_RDLOCK_PATH
+      | MDS_TRAVERSE_WANT_INODE
+      ;
+    
+    // TODO: honor `lps.dont_block` in the path traverse?
+    int r = path_traverse(mdr, cf, mdr->get_filepath(), ptflags, nullptr, &in);
+    if (r > 0)
+      return;
+    if (r < 0) {
+      mds->server->respond_to_request(mdr, r);
+      return;
+    }
+    lps.in = in;
   }
 
-  auto& lps = *static_cast<LockPathState*>(mdr->internal_op_private);
+  // since we have our inode, let's drop all locks from the traversal,
+  // because ideally, we only want to hold the locks from the command
+  mds->locker->drop_locks(mdr.get());
+
+  mdr->mark_event("acquired target inode");
 
   MutationImpl::LockOpVec lov;
-  for (const auto &lock : lps.locks) {
+  for (const auto &lock : lps.config.locks) {
     auto colonps = lock.find(':');
     if (colonps == std::string::npos) {
       mds->server->respond_to_request(mdr, -CEPHFS_EINVAL);
@@ -14065,19 +14077,36 @@ void MDCache::dispatch_lock_path(const MDRequestRef& mdr)
     }
   }
 
-  if (!mds->locker->acquire_locks(mdr, lov, nullptr, {in}, false, true)) {
+  if (!mds->locker->acquire_locks(mdr, lov, lps.config.ap_freeze ? in : nullptr, {in}, lps.config.ap_dont_block, true)) {
+    if (lps.config.ap_dont_block && mdr->aborted) {
+      mds->server->respond_to_request(mdr, -CEPHFS_EWOULDBLOCK);
+    }
     return;
   }
 
+  if (!lps.config.ap_freeze) {
+    // go stealth
+    mdr->drop_local_auth_pins();
+  }
+
+  mdr->mark_event("object locked");
+  if (auto c = mdr->internal_op_finish) {
+    mdr->internal_op_finish = nullptr;
+    c->complete(0);
+  }
   /* deliberately leak until killed */
 }
 
-MDRequestRef MDCache::lock_path(filepath p, std::vector<std::string> locks)
+MDRequestRef MDCache::lock_path(LockPathConfig config, std::function<void(MDRequestRef const& mdr)> on_locked)
 {
   MDRequestRef mdr = request_start_internal(CEPH_MDS_OP_LOCK_PATH);
-  mdr->set_filepath(p);
-  mdr->internal_op_finish = new LambdaContext([](int r) {});
-  mdr->internal_op_private = new LockPathState{locks};
+  mdr->set_filepath(config.fpath);
+  if (on_locked) {
+    mdr->internal_op_finish = new LambdaContext([mdr, cb = std::move(on_locked)](int rc) {
+      cb(mdr);
+    });
+  }
+  mdr->internal_op_private = new LockPathState{std::move(config)};
   dispatch_request(mdr);
   return mdr;
 }
index fdee140a2cd25aa7371573032971e3b9feadd873..8be5f2fde5ff53dbbb2c534f9558e310cd1c6a7d 100644 (file)
@@ -627,7 +627,14 @@ private:
   }
   void add_quiesce(CInode* parent, CInode* in);
 
-  MDRequestRef lock_path(filepath p, std::vector<std::string> locks);
+  struct LockPathConfig {
+    filepath fpath;
+    std::vector<std::string> locks;
+    bool ap_dont_block = false;
+    bool ap_freeze = false;
+  };
+
+  MDRequestRef lock_path(LockPathConfig config, std::function<void(MDRequestRef const& mdr)> on_locked = {});
 
   void clean_open_file_lists();
   void dump_openfiles(Formatter *f);
index 1ea641cf3ba7852ce8586823364a8dcf6c359198..9f39fd8f6fa4acbe31ec407dca10b09113eca903 100644 (file)
@@ -363,6 +363,9 @@ void MDSDaemon::set_up_admin_socket()
   r = admin_socket->register_command("lock path"
                                      " name=path,type=CephString,req=true"
                                      " name=locks,type=CephString,n=N,req=false"
+                                     " name=ap_dont_block,type=CephBool,req=false"
+                                     " name=ap_freeze,type=CephBool,req=false"
+                                     " name=await,type=CephBool,req=false"
                                     ,asok_hook
                                     ,"lock a path");
   ceph_assert(r == 0);
index 94a48ae913bed7ce4e51734e2bb368d4eee46cbe..73e48889b584ed08dc7fac4f817d8d04c6cf9f2c 100644 (file)
@@ -58,6 +58,7 @@ using std::set;
 using std::string;
 using std::vector;
 using TOPNSPC::common::cmd_getval;
+using TOPNSPC::common::cmd_getval_or;
 
 class C_Flush_Journal : public MDSInternalContext {
 public:
@@ -3020,7 +3021,8 @@ void MDSRankDispatcher::handle_asok_command(
   } else if (command == "quiesce path") {
     r = command_quiesce_path(f, cmdmap, *css);
   } else if (command == "lock path") {
-    r = command_lock_path(f, cmdmap, *css);
+    command_lock_path(f, cmdmap, std::move(on_finish));
+    return;
   } else if (command == "dump tree") {
     command_dump_tree(cmdmap, *css, f);
   } else if (command == "dump loads") {
@@ -3509,28 +3511,48 @@ int MDSRank::command_quiesce_path(Formatter* f, const cmdmap_t& cmdmap, std::ost
   return 0;
 }
 
-int MDSRank::command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::ostream& ss)
+void MDSRank::command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::function<void(int, const std::string&, bufferlist&)> on_finish)
 {
   std::string path;
-  {
-    bool got = cmd_getval(cmdmap, "path", path);
-    if (!got) {
-      ss << "missing path";
-      return -CEPHFS_EINVAL;
-    }
+
+  if (!cmd_getval(cmdmap, "path", path)) {
+    bufferlist bl;
+    on_finish(-EINVAL, "missing path", bl);
+    return;
   }
 
-  std::vector<std::string> locks;
-  cmd_getval(cmdmap, "locks", locks);
+  MDCache::LockPathConfig config;
+
+  cmd_getval(cmdmap, "locks", config.locks);
+  config.ap_dont_block = cmd_getval_or<bool>(cmdmap, "ap_dont_block", false);
+  config.ap_freeze = cmd_getval_or<bool>(cmdmap, "ap_freeze", false);
+  config.fpath = filepath(path);
+  bool await = cmd_getval_or<bool>(cmdmap, "await", false);
+
+  auto respond = [f, on_finish=std::move(on_finish)](MDRequestRef const& req) {
+    f->open_object_section("response");
+    req->dump_with_mds_lock(f);
+    f->close_section();
+
+    bufferlist bl;
+    // need to do this manually, because the default asock
+    // on_finish handler doesn't flush the formatter for rc < 0
+    f->flush(bl);
+    auto cephrc = req->result.value_or(0); // SUCCESS makes more sense for this API than EINPROGRESS
+    auto rc = cephrc < 0 ? -ceph_to_hostos_errno(-cephrc) : cephrc;
+    on_finish(rc, "", bl);
+  };
 
-  f->open_object_section("lock");
   {
     std::lock_guard l(mds_lock);
-    auto mdr = mdcache->lock_path(filepath(path), locks);
-    f->dump_object("op", *mdr);
+    if (await) {
+      mdcache->lock_path(std::move(config), std::move(respond));
+    } else {
+      auto mdr = mdcache->lock_path(std::move(config));
+      // respond at once
+      respond(mdr);
+    }
   }
-  f->close_section();
-  return 0;
 }
 
 void MDSRank::command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss)
index 02b9466f43942edbd386653da7cb4e22b0293dcf..128050ebe3493014ee88cbcb796e484dafdec65f 100644 (file)
@@ -527,7 +527,7 @@ class MDSRank {
     void command_openfiles_ls(Formatter *f);
     void command_dump_tree(const cmdmap_t &cmdmap, std::ostream &ss, Formatter *f);
     int command_quiesce_path(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
-    int command_lock_path(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
+    void command_lock_path(Formatter* f, const cmdmap_t& cmdmap, std::function<void(int, const std::string&, bufferlist&)> on_finish);
     void command_dump_inode(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
     void command_dump_dir(Formatter *f, const cmdmap_t &cmdmap, std::ostream &ss);
     void command_cache_drop(uint64_t timeout, Formatter *f, Context *on_finish);
index b7769319cb1cf7c233a0e24f9dc32f3d5858643c..5ebea369fd70eed71b3bb1723003f008c5b182bf 100644 (file)
@@ -478,7 +478,11 @@ void MDRequestImpl::print(ostream &out) const
 void MDRequestImpl::_dump(Formatter *f, bool has_mds_lock) const
 {
   std::lock_guard l(lock);
-  f->dump_int("result", result);
+  if (result) {
+    f->dump_int("result", *result);
+  } else {
+    f->dump_null("result");
+  }
   f->dump_string("flag_point", _get_state_string());
   f->dump_object("reqid", reqid);
   if (client_request) {
index e1b905b33d8cb62ee77fe22ce16b233bcb8110c6..19cf624b12633b11ceaa3b2db137b05f57b88c06 100644 (file)
@@ -15,7 +15,7 @@
 #ifndef CEPH_MDS_MUTATION_H
 #define CEPH_MDS_MUTATION_H
 
-#include <limits>
+#include <optional>
 
 #include "include/interval_set.h"
 #include "include/elist.h"
@@ -242,7 +242,7 @@ public:
   void _dump_op_descriptor(std::ostream& stream) const override;
 
   metareqid_t reqid;
-  int result = std::numeric_limits<int>::min();
+  std::optional<int> result;
   __u32 attempt = 0;      // which attempt for this request
   LogSegment *ls = nullptr;  // the log segment i'm committing to