From: Sage Weil Date: Fri, 28 Jan 2011 20:35:38 +0000 (-0800) Subject: mds: defer sending resolves until mdsmap.failed.empty() X-Git-Tag: v0.25~231^2~43^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f711508b27f5d832c361276989c97ccda205091c;p=ceph.git mds: defer sending resolves until mdsmap.failed.empty() There is no point sending resolves while there are still failed nodes, since we can't complete. We also trigger an assert if we try to send to a failed node. Instead just wait until failed.empty() and then start. Signed-off-by: Sage Weil --- diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index efce6e6b61f4..0a1d2f7b0ea1 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -958,7 +958,8 @@ void MDS::handle_mds_map(MMDSMap *m) if (oldresolve != resolve) { dout(10) << " resolve set is " << resolve << ", was " << oldresolve << dendl; calc_recovery_set(); - mdcache->send_resolves(); + if (!mdsmap->is_any_failed()) + mdcache->send_resolves(); } } diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index bc1015f2712b..7bcdd272bb09 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -410,6 +410,9 @@ public: get_num_mds(STATE_REJOIN) + failed.size(); } + bool is_any_failed() { + return failed.size(); + } bool is_rejoining() { // nodes are rejoining cache state return