From: Patrick Donnelly Date: Sat, 18 Sep 2021 00:15:01 +0000 (-0400) Subject: pybind/mgr/cephadm: disable allow_standby_replay during CephFS upgrade X-Git-Tag: v16.2.7~74^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dcb3455fdfba83a64be3b0a030ef468acfcf6fe6;p=ceph.git pybind/mgr/cephadm: disable allow_standby_replay during CephFS upgrade Following procedure in [1]. Also: harden checks for active. Ensure "up" and "in" are both [0]. There should be no standby-replay daemon. [1] https://docs.ceph.com/en/pacific/cephfs/upgrading/ Fixes: https://tracker.ceph.com/issues/52654 Signed-off-by: Patrick Donnelly (cherry picked from commit bca21f01ce3bb32e0951f0fe15da88a81750a191) --- diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index c08cd3a22570..d05459d14090 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -16,6 +16,9 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# from ceph_fs.h +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1 << 5) + def normalize_image_digest(digest: str, default_registry: str) -> str: # normal case: @@ -41,6 +44,7 @@ class UpgradeState: error: Optional[str] = None, paused: Optional[bool] = None, fs_original_max_mds: Optional[Dict[str, int]] = None, + fs_original_allow_standby_replay: Optional[Dict[str, bool]] = None ): self._target_name: str = target_name # Use CephadmUpgrade.target_image instead. self.progress_id: str = progress_id @@ -50,6 +54,7 @@ class UpgradeState: self.error: Optional[str] = error self.paused: bool = paused or False self.fs_original_max_mds: Optional[Dict[str, int]] = fs_original_max_mds + self.fs_original_allow_standby_replay: Optional[Dict[str, bool]] = fs_original_allow_standby_replay def to_json(self) -> dict: return { @@ -59,6 +64,7 @@ class UpgradeState: 'target_digests': self.target_digests, 'target_version': self.target_version, 'fs_original_max_mds': self.fs_original_max_mds, + 'fs_original_allow_standby_replay': self.fs_original_allow_standby_replay, 'error': self.error, 'paused': self.paused, } @@ -360,20 +366,39 @@ class CephadmUpgrade: assert self.upgrade_state if not self.upgrade_state.fs_original_max_mds: self.upgrade_state.fs_original_max_mds = {} + if not self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay = {} fsmap = self.mgr.get("fs_map") continue_upgrade = True - for i in fsmap.get('filesystems', []): - fs = i["mdsmap"] - fs_id = i["id"] - fs_name = fs["fs_name"] + for fs in fsmap.get('filesystems', []): + fscid = fs["id"] + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] + + # disable allow_standby_replay? + if mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY: + self.mgr.log.info('Upgrade: Disabling standby-replay for filesystem %s' % ( + fs_name + )) + if fscid not in self.upgrade_state.fs_original_allow_standby_replay: + self.upgrade_state.fs_original_allow_standby_replay[fscid] = True + self._save_upgrade_state() + ret, out, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '0', + }) + continue_upgrade = False + continue # scale down this filesystem? - if fs["max_mds"] > 1: + if mdsmap["max_mds"] > 1: self.mgr.log.info('Upgrade: Scaling down filesystem %s' % ( fs_name )) - if fs_id not in self.upgrade_state.fs_original_max_mds: - self.upgrade_state.fs_original_max_mds[fs_id] = fs['max_mds'] + if fscid not in self.upgrade_state.fs_original_max_mds: + self.upgrade_state.fs_original_max_mds[fscid] = mdsmap['max_mds'] self._save_upgrade_state() ret, out, err = self.mgr.check_mon_command({ 'prefix': 'fs set', @@ -384,13 +409,15 @@ class CephadmUpgrade: continue_upgrade = False continue - if len(fs['info']) > 1: - self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to 1 MDS' % (fs_name)) + if not (mdsmap['in'] == [0] and len(mdsmap['up']) == 1): + self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name)) time.sleep(10) continue_upgrade = False continue - lone_mds = list(fs['info'].values())[0] + mdss = list(mdsmap['info'].values()) + assert len(mdss) == 1 + lone_mds = mdss[0] if lone_mds['state'] != 'up:active': self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % ( lone_mds['name'], @@ -420,9 +447,9 @@ class CephadmUpgrade: # find fs this mds daemon belongs to fsmap = self.mgr.get("fs_map") - for i in fsmap.get('filesystems', []): - fs = i["mdsmap"] - fs_name = fs["fs_name"] + for fs in fsmap.get('filesystems', []): + mdsmap = fs["mdsmap"] + fs_name = mdsmap["fs_name"] assert mds_daemon.daemon_id if fs_name != mds_daemon.service_name().split('.', 1)[1]: @@ -434,7 +461,7 @@ class CephadmUpgrade: [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())]) # standby mds daemons for this fs? - if fs["max_mds"] < mds_count: + if mdsmap["max_mds"] < mds_count: return True return False @@ -753,24 +780,43 @@ class CephadmUpgrade: }) # complete mds upgrade? - if daemon_type == 'mds' and self.upgrade_state.fs_original_max_mds: - for i in self.mgr.get("fs_map")['filesystems']: - fs_id = i["id"] - fs_name = i['mdsmap']['fs_name'] - new_max = self.upgrade_state.fs_original_max_mds.get(fs_id) - if new_max: - self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( - fs_name, new_max - )) - ret, _, err = self.mgr.check_mon_command({ - 'prefix': 'fs set', - 'fs_name': fs_name, - 'var': 'max_mds', - 'val': str(new_max), - }) - - self.upgrade_state.fs_original_max_mds = {} - self._save_upgrade_state() + if daemon_type == 'mds': + if self.upgrade_state.fs_original_max_mds: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + new_max = self.upgrade_state.fs_original_max_mds.get(fscid, 1) + if new_max > 1: + self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % ( + fs_name, new_max + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'max_mds', + 'val': str(new_max), + }) + + self.upgrade_state.fs_original_max_mds = {} + self._save_upgrade_state() + if self.upgrade_state.fs_original_allow_standby_replay: + for fs in self.mgr.get("fs_map")['filesystems']: + fscid = fs["id"] + fs_name = fs['mdsmap']['fs_name'] + asr = self.upgrade_state.fs_original_allow_standby_replay.get(fscid, False) + if asr: + self.mgr.log.info('Upgrade: Enabling allow_standby_replay on filesystem %s' % ( + fs_name + )) + ret, _, err = self.mgr.check_mon_command({ + 'prefix': 'fs set', + 'fs_name': fs_name, + 'var': 'allow_standby_replay', + 'val': '1' + }) + + self.upgrade_state.fs_original_allow_standby_replay = {} + self._save_upgrade_state() # clean up logger.info('Upgrade: Finalizing container_image settings')