]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: just respawn mds daemon when osd op requests timeout 43785/head
authorXiubo Li <xiubli@redhat.com>
Wed, 3 Nov 2021 06:27:02 +0000 (14:27 +0800)
committerXiubo Li <xiubli@redhat.com>
Thu, 4 Nov 2021 00:43:06 +0000 (08:43 +0800)
Fixes: https://tracker.ceph.com/issues/51280
Signed-off-by: Xiubo Li <xiubli@redhat.com>
src/mds/MDSContext.cc

index 94726a2cb4f45d2c77b7e30be5a163c0ed2aff35..f96fc54202a78980f878a5dd312897f0e0343833 100644 (file)
@@ -107,8 +107,11 @@ void MDSIOContextBase::complete(int r) {
     return;
   }
 
-  if (r == -EBLACKLISTED) {
-    derr << "MDSIOContextBase: blacklisted!  Restarting..." << dendl;
+  // It's possible that the osd op requests will be stuck and then times out
+  // after "rados_osd_op_timeout", the mds won't know what we should it, just
+  // respawn it.
+  if (r == -EBLACKLISTED || r == -ETIMEDOUT) {
+    derr << "MDSIOContextBase: failed with " << r << ", restarting..." << dendl;
     mds->respawn();
   } else {
     MDSContext::complete(r);