mds: try to open the ino if couldn't be found in MDCache in all peers

author Xiubo Li <xiubli@redhat.com>

Fri, 1 Apr 2022 14:15:39 +0000 (22:15 +0800)

committer Xiubo Li <xiubli@redhat.com>

Wed, 8 Jun 2022 01:54:30 +0000 (09:54 +0800)
author Xiubo Li <xiubli@redhat.com>
Fri, 1 Apr 2022 14:15:39 +0000 (22:15 +0800)
committer Xiubo Li <xiubli@redhat.com>
Wed, 8 Jun 2022 01:54:30 +0000 (09:54 +0800)
diff --git a/src/mds/Server.cc b/src/mds/Server.cc

index 7edcdc5f8789c06ba9757ed0a3fd1a55a16f72d6..62cbaf624c8a51e0ab892e31e7a734ec1907c1d5 100644 (file)
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -3423,15 +3423,68 @@ void Server::apply_allocated_inos(MDRequestRef& mdr, Session *session)
    }
  }
  
+struct C_MDS_TryOpenInode : public ServerContext {
+  MDRequestRef mdr;
+  inodeno_t ino;
+  C_MDS_TryOpenInode(Server *s, MDRequestRef& r, inodeno_t i) :
+    ServerContext(s), mdr(r), ino(i) {}
+  void finish(int r) override {
+    server->_try_open_ino(mdr, r, ino);
+  }
+};
+
+void Server::_try_open_ino(MDRequestRef& mdr, int r, inodeno_t ino)
+{
+  dout(10) << "_try_open_ino " << mdr.get() << " ino " << ino << " r=" << r << dendl;
+
+  // `r` is a rank if >=0, else an error code
+  if (r >= 0) {
+    mds_rank_t dest_rank(r);
+    if (dest_rank == mds->get_nodeid())
+      dispatch_client_request(mdr);
+    else
+      mdcache->request_forward(mdr, dest_rank);
+    return;
+  }
+
+  // give up
+  if (r == -CEPHFS_ENOENT || r == -CEPHFS_ENODATA)
+    r = -CEPHFS_ESTALE;
+  respond_to_request(mdr, r);
+}
+
  class C_MDS_TryFindInode : public ServerContext {
    MDRequestRef mdr;
+  MDCache *mdcache;
+  inodeno_t ino;
  public:
-  C_MDS_TryFindInode(Server *s, MDRequestRef& r) : ServerContext(s), mdr(r) {}
+  C_MDS_TryFindInode(Server *s, MDRequestRef& r, MDCache *m, inodeno_t i) :
+    ServerContext(s), mdr(r), mdcache(m), ino(i) {}
    void finish(int r) override {
-    if (r == -CEPHFS_ESTALE) // :( find_ino_peers failed
-      server->respond_to_request(mdr, r);
-    else
+    if (r == -CEPHFS_ESTALE) { // :( find_ino_peers failed
+      /*
+       * There has one case that when the MDS crashes and the
+       * openfiletable journal couldn't be flushed and then
+       * the replacing MDS is possibly won't load some already
+       * opened CInodes into the MDCache. And if the clients
+       * will retry some requests after reconnected, the MDS
+       * will return -ESTALE after failing to find the ino in
+       * all active peers.
+       *
+       * As a workaround users can run `ls -R ${mountpoint}`
+       * to list all the sub-files or sub-direcotries from the
+       * mountpoint.
+       *
+       * We need try to open the ino and try it again.
+       */
+      CInode *in = mdcache->get_inode(ino);
+      if (in && in->state_test(CInode::STATE_PURGING))
+        server->respond_to_request(mdr, r);
+      else
+        mdcache->open_ino(ino, (int64_t)-1, new C_MDS_TryOpenInode(server, mdr, ino));
+    } else {
        server->dispatch_client_request(mdr);
+    }
    }
  };
  
@@ -3471,8 +3524,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr,
        respond_to_request(mdr, r);
      } else if (r == -CEPHFS_ESTALE) {
        dout(10) << "FAIL on CEPHFS_ESTALE but attempting recovery" << dendl;
-      MDSContext *c = new C_MDS_TryFindInode(this, mdr);
-      mdcache->find_ino_peers(refpath.get_ino(), c);
+      inodeno_t ino = refpath.get_ino();
+      mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino));
      } else {
        dout(10) << "FAIL on error " << r << dendl;
        respond_to_request(mdr, r);
@@ -3557,7 +3610,8 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequestRef& mdr,
    if (r < 0) {
      if (r == -CEPHFS_ESTALE) {
        dout(10) << "FAIL on CEPHFS_ESTALE but attempting recovery" << dendl;
-      mdcache->find_ino_peers(refpath.get_ino(), new C_MDS_TryFindInode(this, mdr));
+      inodeno_t ino = refpath.get_ino();
+      mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino));
        return nullptr;
      }
      respond_to_request(mdr, r);
@@ -3642,7 +3696,8 @@ Server::rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn)
    if (r != 0) {
      if (r == -CEPHFS_ESTALE) {
        dout(10) << "CEPHFS_ESTALE on path, attempting recovery" << dendl;
-      mdcache->find_ino_peers(refpath.get_ino(), new C_MDS_TryFindInode(this, mdr));
+      inodeno_t ino = refpath.get_ino();
+      mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino));
      } else if (r < 0) {
        respond_to_request(mdr, r);
      }
@@ -3654,7 +3709,8 @@ Server::rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn)
    if (r != 0) {
      if (r == -CEPHFS_ESTALE) {
        dout(10) << "CEPHFS_ESTALE on path2, attempting recovery" << dendl;
-      mdcache->find_ino_peers(refpath2.get_ino(), new C_MDS_TryFindInode(this, mdr));
+      inodeno_t ino = refpath2.get_ino();
+      mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino));
      } else if (r < 0) {
        respond_to_request(mdr, r);
      }
@@ -6873,7 +6929,8 @@ void Server::handle_client_link(MDRequestRef& mdr)
      targeti = mdcache->get_inode(req->get_filepath2().get_ino());
      if (!targeti) {
        dout(10) << "CEPHFS_ESTALE on path2, attempting recovery" << dendl;
-      mdcache->find_ino_peers(req->get_filepath2().get_ino(), new C_MDS_TryFindInode(this, mdr));
+      inodeno_t ino = req->get_filepath2().get_ino();
+      mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino));
        return;
      }
      mdr->pin(targeti);
diff --git a/src/mds/Server.h b/src/mds/Server.h

index 3d69a536ad6fb73548c4bf5abc5759bd9a07505d..8729a45c0f880ea0a603f092a3c7a36a0ce3de44 100644 (file)
--- a/src/mds/Server.h
+++ b/src/mds/Server.h
@@ -187,6 +187,7 @@ public:
    void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
    void apply_allocated_inos(MDRequestRef& mdr, Session *session);
  
+  void _try_open_ino(MDRequestRef& mdr, int r, inodeno_t ino);
    CInode* rdlock_path_pin_ref(MDRequestRef& mdr, bool want_auth,
                               bool no_want_auth=false);
    CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, bool create,
author	Xiubo Li <xiubli@redhat.com>
	Fri, 1 Apr 2022 14:15:39 +0000 (22:15 +0800)
committer	Xiubo Li <xiubli@redhat.com>
	Wed, 8 Jun 2022 01:54:30 +0000 (09:54 +0800)
src/mds/Server.cc		patch \| blob \| history
src/mds/Server.h		patch \| blob \| history