]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: skip ok-to-stop for mds in upgrade if not enough mds daemons
authorAdam King <adking@redhat.com>
Fri, 14 May 2021 15:44:48 +0000 (11:44 -0400)
committerSebastian Wagner <sewagner@redhat.com>
Thu, 17 Jun 2021 08:47:03 +0000 (10:47 +0200)
Fixes: https://tracker.ceph.com/issues/50817
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 2bfc4bdccfc25bc336dc5d6a472b6fd94337beef)

src/pybind/mgr/cephadm/upgrade.py

index 830b1cb1a57b9de8df2bfc75b886443118d4c18f..130db996625874e657a7c2ed73e876056d1775c1 100644 (file)
@@ -433,6 +433,31 @@ class CephadmUpgrade:
         mons = [m['name'] for m in j['monmap']['mons']]
         return len(mons) > 2
 
+    def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool:
+        # type (DaemonDescription) -> bool
+
+        # find fs this mds daemon belongs to
+        fsmap = self.mgr.get("fs_map")
+        for i in fsmap.get('filesystems', []):
+            fs = i["mdsmap"]
+            fs_name = fs["fs_name"]
+
+            assert mds_daemon.daemon_id
+            if fs_name != mds_daemon.service_name().split('.', 1)[1]:
+                # wrong fs for this mds daemon
+                continue
+
+            # get number of mds daemons for this fs
+            mds_count = len(
+                [daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())])
+
+            # standby mds daemons for this fs?
+            if fs["max_mds"] < mds_count:
+                return True
+            return False
+
+        return True  # if mds has no fs it should pass ok-to-stop
+
     def _do_upgrade(self):
         # type: () -> None
         if not self.upgrade_state:
@@ -580,7 +605,7 @@ class CephadmUpgrade:
                         to_upgrade.append(d_entry)
                     continue
 
-                if d.daemon_type in ['osd', 'mds']:
+                if d.daemon_type == 'osd':
                     # NOTE: known_ok_to_stop is an output argument for
                     # _wait_for_ok_to_stop
                     if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
@@ -590,6 +615,10 @@ class CephadmUpgrade:
                     if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
                         return
 
+                if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d):
+                    if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
+                        return
+
                 to_upgrade.append(d_entry)
 
                 # if we don't have a list of others to consider, stop now