From 2bfc4bdccfc25bc336dc5d6a472b6fd94337beef Mon Sep 17 00:00:00 2001 From: Adam King Date: Fri, 14 May 2021 11:44:48 -0400 Subject: [PATCH] mgr/cephadm: skip ok-to-stop for mds in upgrade if not enough mds daemons Fixes: https://tracker.ceph.com/issues/50817 Signed-off-by: Adam King --- src/pybind/mgr/cephadm/upgrade.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 830b1cb1a57..130db996625 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -433,6 +433,31 @@ class CephadmUpgrade: mons = [m['name'] for m in j['monmap']['mons']] return len(mons) > 2 + def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool: + # type (DaemonDescription) -> bool + + # find fs this mds daemon belongs to + fsmap = self.mgr.get("fs_map") + for i in fsmap.get('filesystems', []): + fs = i["mdsmap"] + fs_name = fs["fs_name"] + + assert mds_daemon.daemon_id + if fs_name != mds_daemon.service_name().split('.', 1)[1]: + # wrong fs for this mds daemon + continue + + # get number of mds daemons for this fs + mds_count = len( + [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())]) + + # standby mds daemons for this fs? + if fs["max_mds"] < mds_count: + return True + return False + + return True # if mds has no fs it should pass ok-to-stop + def _do_upgrade(self): # type: () -> None if not self.upgrade_state: @@ -580,7 +605,7 @@ class CephadmUpgrade: to_upgrade.append(d_entry) continue - if d.daemon_type in ['osd', 'mds']: + if d.daemon_type == 'osd': # NOTE: known_ok_to_stop is an output argument for # _wait_for_ok_to_stop if not self._wait_for_ok_to_stop(d, known_ok_to_stop): @@ -590,6 +615,10 @@ class CephadmUpgrade: if not self._wait_for_ok_to_stop(d, known_ok_to_stop): return + if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d): + if not self._wait_for_ok_to_stop(d, known_ok_to_stop): + return + to_upgrade.append(d_entry) # if we don't have a list of others to consider, stop now -- 2.39.5