]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
pybind/mgr/cephadm: upgrade MDS if no MDS is "up" 43728/head
authorPatrick Donnelly <pdonnell@redhat.com>
Thu, 28 Oct 2021 00:26:55 +0000 (20:26 -0400)
committerSebastian Wagner <sewagner@redhat.com>
Tue, 2 Nov 2021 09:02:51 +0000 (10:02 +0100)
The upgrade process can get stuck if an MDS crashes. This should be rare
when straddling v16.2.5 where the compatset of the file system inherits
the FSMap "default". The MDS from pre-v16.2.5 do not yet share a
compatset with the mons so the mons will do no promotions, causing
upgrade task to get stuck.

Fixes: https://tracker.ceph.com/issues/53074
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit d7717256644e45ba670165c6af0941461fb884f4)

src/pybind/mgr/cephadm/upgrade.py

index f6b638b466328d2c915a073277de57d08f714c0f..374be6bd23c59fe97fb8e4c7999662b5284b3ec4 100644 (file)
@@ -441,23 +441,31 @@ class CephadmUpgrade:
                 continue_upgrade = False
                 continue
 
-            if not (mdsmap['in'] == [0] and len(mdsmap['up']) == 1):
+            if not (mdsmap['in'] == [0] and len(mdsmap['up']) <= 1):
                 self.mgr.log.info('Upgrade: Waiting for fs %s to scale down to reach 1 MDS' % (fs_name))
                 time.sleep(10)
                 continue_upgrade = False
                 continue
 
-            mdss = list(mdsmap['info'].values())
-            assert len(mdss) == 1
-            lone_mds = mdss[0]
-            if lone_mds['state'] != 'up:active':
-                self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % (
-                    lone_mds['name'],
-                    lone_mds['state'],
-                ))
-                time.sleep(10)
-                continue_upgrade = False
-                continue
+            if len(mdsmap['up']) == 0:
+                self.mgr.log.warning("Upgrade: No mds is up; continuing upgrade procedure to poke things in the right direction")
+                # This can happen because the current version MDS have
+                # incompatible compatsets; the mons will not do any promotions.
+                # We must upgrade to continue.
+            elif len(mdsmap['up']) > 0:
+                mdss = list(mdsmap['info'].values())
+                assert len(mdss) == 1
+                lone_mds = mdss[0]
+                if lone_mds['state'] != 'up:active':
+                    self.mgr.log.info('Upgrade: Waiting for mds.%s to be up:active (currently %s)' % (
+                        lone_mds['name'],
+                        lone_mds['state'],
+                    ))
+                    time.sleep(10)
+                    continue_upgrade = False
+                    continue
+            else:
+                assert False
 
         return continue_upgrade