]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm/upgrade: scale down MDS cluster(s) for major version upgrades
authorSage Weil <sage@newdream.net>
Tue, 26 Jan 2021 22:10:13 +0000 (16:10 -0600)
committerSage Weil <sage@newdream.net>
Mon, 1 Feb 2021 23:20:14 +0000 (17:20 -0600)
For octopus -> pacific, as with other recent releases, we need to scale
down the MDS cluster(s) to a single daemon before upgrading.  (This is
because the MDS intra-cluster protocols aren't fully versioned.)

Signed-off-by: Sage Weil <sage@newdream.net>
src/pybind/mgr/cephadm/upgrade.py

index bd9186bbe3f6926993628a6b8565693a69f0d39b..1cc2cce6fde1a02f529987626c75767a2036a21e 100644 (file)
@@ -25,6 +25,7 @@ class UpgradeState:
                  target_version: Optional[str] = None,
                  error: Optional[str] = None,
                  paused: Optional[bool] = None,
+                 fs_original_max_mds: Optional[Dict[str,int]] = None,
                  ):
         self._target_name: str = target_name  # Use CephadmUpgrade.target_image instead.
         self.progress_id: str = progress_id
@@ -33,6 +34,7 @@ class UpgradeState:
         self.target_version: Optional[str] = target_version
         self.error: Optional[str] = error
         self.paused: bool = paused or False
+        self.fs_original_max_mds: Optional[Dict[str,int]] = fs_original_max_mds
 
     def to_json(self) -> dict:
         return {
@@ -41,6 +43,7 @@ class UpgradeState:
             'target_id': self.target_id,
             'target_digests': self.target_digests,
             'target_version': self.target_version,
+            'fs_original_max_mds': self.fs_original_max_mds,
             'error': self.error,
             'paused': self.paused,
         }
@@ -274,6 +277,75 @@ class CephadmUpgrade:
                 image_settings[opt['section']] = opt['value']
         return image_settings
 
+    def _prepare_for_mds_upgrade(
+        self,
+        target_major: str,
+        need_upgrade: List[DaemonDescription]
+    ) -> bool:
+        # are any daemons running a different major version?
+        scale_down = False
+        for name, info in self.mgr.get("mds_metadata").items():
+            version = info.get("ceph_version_short")
+            major_version = None
+            if version:
+                major_version = version.split('.')[0]
+            if not major_version:
+                self.mgr.log.info('Upgrade: mds.%s version is not known, will retry' % name)
+                time.sleep(5)
+                return False
+            if int(major_version) < int(target_major):
+                scale_down = True
+
+        if not scale_down:
+            self.mgr.log.debug('Upgrade: All MDS daemons run same major version')
+            return True
+
+        # scale down all filesystems to 1 MDS
+        assert self.upgrade_state
+        if not self.upgrade_state.fs_original_max_mds:
+            self.upgrade_state.fs_original_max_mds = {}
+        fsmap = self.mgr.get("fs_map")
+        continue_upgrade = True
+        for i in fsmap.get('filesystems', []):
+            fs = i["mdsmap"]
+            fs_id = i["id"]
+            fs_name = fs["fs_name"]
+
+            # scale down this filesystem?
+            if fs["max_mds"] > 1:
+                self.mgr.log.info('Upgrade: Scaling down filesystem %s' % (
+                    fs_name
+                ))
+                if fs_id not in self.upgrade_state.fs_original_max_mds:
+                    self.upgrade_state.fs_original_max_mds[fs_id] = fs['max_mds']
+                    self._save_upgrade_state()
+                ret, out, err = self.mgr.check_mon_command({
+                    'prefix': 'fs set',
+                    'fs_name': fs_name,
+                    'var': 'max_mds',
+                    'val': '1',
+                })
+                continue_upgrade = False
+                continue
+
+            if len(fs['info']) > 1:
+                self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to 1 MDS' % (fs_name))
+                time.sleep(10)
+                continue_upgrade = False
+                continue
+
+            lone_mds = list(fs['info'].values())[0]
+            if lone_mds['state'] != 'up:active':
+                self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % (
+                    lone_mds['name'],
+                    lone_mds['state'],
+                ))
+                time.sleep(10)
+                continue_upgrade = False
+                continue
+
+        return continue_upgrade
+
     def _do_upgrade(self):
         # type: () -> None
         if not self.upgrade_state:
@@ -339,7 +411,9 @@ class CephadmUpgrade:
         done = 0
         for daemon_type in CEPH_UPGRADE_ORDER:
             logger.info('Upgrade: Checking %s daemons' % daemon_type)
+
             need_upgrade_self = False
+            need_upgrade = []
             for d in daemons:
                 if d.daemon_type != daemon_type:
                     continue
@@ -354,7 +428,6 @@ class CephadmUpgrade:
 
                 assert d.daemon_type is not None
                 assert d.daemon_id is not None
-                assert d.hostname is not None
 
                 if self.mgr.daemon_is_self(d.daemon_type, d.daemon_id):
                     logger.info('Upgrade: Need to upgrade myself (mgr.%s)' %
@@ -362,6 +435,21 @@ class CephadmUpgrade:
                     need_upgrade_self = True
                     continue
 
+                need_upgrade.append(d)
+
+            # prepare filesystems for daemon upgrades?
+            if (
+                daemon_type == 'mds'
+                and need_upgrade
+                and not self._prepare_for_mds_upgrade(target_major, need_upgrade)
+            ):
+                return
+
+            for d in need_upgrade:
+                assert d.daemon_type is not None
+                assert d.daemon_id is not None
+                assert d.hostname is not None
+
                 # make sure host has latest container image
                 out, errs, code = CephadmServe(self.mgr)._run_cephadm(
                     d.hostname, '', 'inspect-image', [],
@@ -487,6 +575,26 @@ class CephadmUpgrade:
                         'release': target_major_name,
                     })
 
+            # complete mds upgrade?
+            if daemon_type == 'mds' and self.upgrade_state.fs_original_max_mds:
+                for i in self.mgr.get("fs_map")['filesystems']:
+                    fs_id = i["id"]
+                    fs_name = i['mdsmap']['fs_name']
+                    new_max = self.upgrade_state.fs_original_max_mds.get(fs_id)
+                    if new_max:
+                        self.mgr.log.info('Upgrade: Scaling up filesystem %s max_mds to %d' % (
+                            fs_name, new_max
+                        ))
+                        ret, _, err = self.mgr.check_mon_command({
+                            'prefix': 'fs set',
+                            'fs_name': fs_name,
+                            'var': 'max_mds',
+                            'val': str(new_max),
+                        })
+
+                self.upgrade_state.fs_original_max_mds = {}
+                self._save_upgrade_state()
+
         # clean up
         logger.info('Upgrade: Finalizing container_image settings')
         self.mgr.set_container_image('global', target_image)