From: Xiubo Li Date: Fri, 1 Apr 2022 14:15:39 +0000 (+0800) Subject: mds: try to open the ino if couldn't be found in MDCache in all peers X-Git-Tag: v16.2.11~479^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=22b3d293df7408220b80df873abae4638d986495;p=ceph.git mds: try to open the ino if couldn't be found in MDCache in all peers There has one case that when the MDS crashes and the openfiletable journal couldn't be flushed and then the replacing MDS is possibly won't load some already opened CInodes into the MDCache. And if the clients will retry some requests after reconnected, the MDS will return -ESTALE after failing to find the ino in all active peers. As a workaround users can run `ls -R ${mountpoint}` to list all the sub-files or sub-direcotries from the mountpoint. We need try to open the ino and try it again. Fixes: https://tracker.ceph.com/issues/53504 Signed-off-by: Xiubo Li (cherry picked from commit c6601bc53ee8f69c6c919b122fea9dab72fc157f) --- diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 7edcdc5f878..62cbaf624c8 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3423,15 +3423,68 @@ void Server::apply_allocated_inos(MDRequestRef& mdr, Session *session) } } +struct C_MDS_TryOpenInode : public ServerContext { + MDRequestRef mdr; + inodeno_t ino; + C_MDS_TryOpenInode(Server *s, MDRequestRef& r, inodeno_t i) : + ServerContext(s), mdr(r), ino(i) {} + void finish(int r) override { + server->_try_open_ino(mdr, r, ino); + } +}; + +void Server::_try_open_ino(MDRequestRef& mdr, int r, inodeno_t ino) +{ + dout(10) << "_try_open_ino " << mdr.get() << " ino " << ino << " r=" << r << dendl; + + // `r` is a rank if >=0, else an error code + if (r >= 0) { + mds_rank_t dest_rank(r); + if (dest_rank == mds->get_nodeid()) + dispatch_client_request(mdr); + else + mdcache->request_forward(mdr, dest_rank); + return; + } + + // give up + if (r == -CEPHFS_ENOENT || r == -CEPHFS_ENODATA) + r = -CEPHFS_ESTALE; + respond_to_request(mdr, r); +} + class C_MDS_TryFindInode : public ServerContext { MDRequestRef mdr; + MDCache *mdcache; + inodeno_t ino; public: - C_MDS_TryFindInode(Server *s, MDRequestRef& r) : ServerContext(s), mdr(r) {} + C_MDS_TryFindInode(Server *s, MDRequestRef& r, MDCache *m, inodeno_t i) : + ServerContext(s), mdr(r), mdcache(m), ino(i) {} void finish(int r) override { - if (r == -CEPHFS_ESTALE) // :( find_ino_peers failed - server->respond_to_request(mdr, r); - else + if (r == -CEPHFS_ESTALE) { // :( find_ino_peers failed + /* + * There has one case that when the MDS crashes and the + * openfiletable journal couldn't be flushed and then + * the replacing MDS is possibly won't load some already + * opened CInodes into the MDCache. And if the clients + * will retry some requests after reconnected, the MDS + * will return -ESTALE after failing to find the ino in + * all active peers. + * + * As a workaround users can run `ls -R ${mountpoint}` + * to list all the sub-files or sub-direcotries from the + * mountpoint. + * + * We need try to open the ino and try it again. + */ + CInode *in = mdcache->get_inode(ino); + if (in && in->state_test(CInode::STATE_PURGING)) + server->respond_to_request(mdr, r); + else + mdcache->open_ino(ino, (int64_t)-1, new C_MDS_TryOpenInode(server, mdr, ino)); + } else { server->dispatch_client_request(mdr); + } } }; @@ -3471,8 +3524,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, respond_to_request(mdr, r); } else if (r == -CEPHFS_ESTALE) { dout(10) << "FAIL on CEPHFS_ESTALE but attempting recovery" << dendl; - MDSContext *c = new C_MDS_TryFindInode(this, mdr); - mdcache->find_ino_peers(refpath.get_ino(), c); + inodeno_t ino = refpath.get_ino(); + mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino)); } else { dout(10) << "FAIL on error " << r << dendl; respond_to_request(mdr, r); @@ -3557,7 +3610,8 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequestRef& mdr, if (r < 0) { if (r == -CEPHFS_ESTALE) { dout(10) << "FAIL on CEPHFS_ESTALE but attempting recovery" << dendl; - mdcache->find_ino_peers(refpath.get_ino(), new C_MDS_TryFindInode(this, mdr)); + inodeno_t ino = refpath.get_ino(); + mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino)); return nullptr; } respond_to_request(mdr, r); @@ -3642,7 +3696,8 @@ Server::rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn) if (r != 0) { if (r == -CEPHFS_ESTALE) { dout(10) << "CEPHFS_ESTALE on path, attempting recovery" << dendl; - mdcache->find_ino_peers(refpath.get_ino(), new C_MDS_TryFindInode(this, mdr)); + inodeno_t ino = refpath.get_ino(); + mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino)); } else if (r < 0) { respond_to_request(mdr, r); } @@ -3654,7 +3709,8 @@ Server::rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn) if (r != 0) { if (r == -CEPHFS_ESTALE) { dout(10) << "CEPHFS_ESTALE on path2, attempting recovery" << dendl; - mdcache->find_ino_peers(refpath2.get_ino(), new C_MDS_TryFindInode(this, mdr)); + inodeno_t ino = refpath2.get_ino(); + mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino)); } else if (r < 0) { respond_to_request(mdr, r); } @@ -6873,7 +6929,8 @@ void Server::handle_client_link(MDRequestRef& mdr) targeti = mdcache->get_inode(req->get_filepath2().get_ino()); if (!targeti) { dout(10) << "CEPHFS_ESTALE on path2, attempting recovery" << dendl; - mdcache->find_ino_peers(req->get_filepath2().get_ino(), new C_MDS_TryFindInode(this, mdr)); + inodeno_t ino = req->get_filepath2().get_ino(); + mdcache->find_ino_peers(ino, new C_MDS_TryFindInode(this, mdr, mdcache, ino)); return; } mdr->pin(targeti); diff --git a/src/mds/Server.h b/src/mds/Server.h index 3d69a536ad6..8729a45c0f8 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -187,6 +187,7 @@ public: void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob); void apply_allocated_inos(MDRequestRef& mdr, Session *session); + void _try_open_ino(MDRequestRef& mdr, int r, inodeno_t ino); CInode* rdlock_path_pin_ref(MDRequestRef& mdr, bool want_auth, bool no_want_auth=false); CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, bool create,