From: Xiubo Li Date: Thu, 24 Jun 2021 06:41:10 +0000 (+0800) Subject: mds: just respawn mds daemon when osd op requests timeout X-Git-Tag: v16.2.5~15^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F42072%2Fhead;p=ceph.git mds: just respawn mds daemon when osd op requests timeout Fixes: https://tracker.ceph.com/issues/51280 Signed-off-by: Xiubo Li (cherry picked from commit c854a4eea44a631079dfe481c235a323fae54b74) --- diff --git a/src/mds/MDSContext.cc b/src/mds/MDSContext.cc index 8c04586f212e..fcf6f764fe4c 100644 --- a/src/mds/MDSContext.cc +++ b/src/mds/MDSContext.cc @@ -107,8 +107,11 @@ void MDSIOContextBase::complete(int r) { return; } - if (r == -CEPHFS_EBLOCKLISTED) { - derr << "MDSIOContextBase: blocklisted! Restarting..." << dendl; + // It's possible that the osd op requests will be stuck and then times out + // after "rados_osd_op_timeout", the mds won't know what we should it, just + // respawn it. + if (r == -CEPHFS_EBLOCKLISTED || r == -CEPHFS_ETIMEDOUT) { + derr << "MDSIOContextBase: failed with " << r << ", restarting..." << dendl; mds->respawn(); } else { MDSContext::complete(r);