]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: just respawn mds daemon when osd op requests timeout 42072/head
authorXiubo Li <xiubli@redhat.com>
Thu, 24 Jun 2021 06:41:10 +0000 (14:41 +0800)
committerPatrick Donnelly <pdonnell@redhat.com>
Mon, 28 Jun 2021 19:53:39 +0000 (12:53 -0700)
Fixes: https://tracker.ceph.com/issues/51280
Signed-off-by: Xiubo Li <xiubli@redhat.com>
(cherry picked from commit c854a4eea44a631079dfe481c235a323fae54b74)

src/mds/MDSContext.cc

index 8c04586f212e0b62e2e015e7a26c749c03c0efdb..fcf6f764fe4c1cf2538a9f89b547f475eca953d6 100644 (file)
@@ -107,8 +107,11 @@ void MDSIOContextBase::complete(int r) {
     return;
   }
 
-  if (r == -CEPHFS_EBLOCKLISTED) {
-    derr << "MDSIOContextBase: blocklisted!  Restarting..." << dendl;
+  // It's possible that the osd op requests will be stuck and then times out
+  // after "rados_osd_op_timeout", the mds won't know what we should it, just
+  // respawn it.
+  if (r == -CEPHFS_EBLOCKLISTED || r == -CEPHFS_ETIMEDOUT) {
+    derr << "MDSIOContextBase: failed with " << r << ", restarting..." << dendl;
     mds->respawn();
   } else {
     MDSContext::complete(r);