]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: still remove daemons in error state if ok-to-stop fails
authorAdam King <adking@redhat.com>
Fri, 28 Jul 2023 17:24:06 +0000 (13:24 -0400)
committerAdam King <adking@redhat.com>
Tue, 8 Aug 2023 14:49:28 +0000 (10:49 -0400)
The ok-to-stop function works for certain daemons
by checking if there are at least a certain number
(typically 1) daemon(s) that are actually running
and saying it's not ok-to-stop if if that won't
be true after the removals. This case breaks down
when all the daemons are in error state, making
it so cephadm will refuse to remove a set of
daemons that aren't even working because they're
not "ok to stop". Since ok-to-stop works in a
yes or no fashion, something like this where we
want to be willing to remove a certain subset
(or potentially all currently deployed) daemons
it's easier to keep this logic as part of applying
the service

Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/serve.py

index f6a5425f2a14518907781b24e7d62c40c7db6148..1f108415a061632eab76f8bda58d1863d262aa3b 100644 (file)
@@ -920,7 +920,18 @@ class CephadmServe:
 
             while daemons_to_remove and not _ok_to_stop(daemons_to_remove):
                 # let's find a subset that is ok-to-stop
-                daemons_to_remove.pop()
+                non_error_daemon_index = -1
+                # prioritize removing daemons in error state
+                for i, dmon in enumerate(daemons_to_remove):
+                    if dmon.status != DaemonDescriptionStatus.error:
+                        non_error_daemon_index = i
+                        break
+                if non_error_daemon_index != -1:
+                    daemons_to_remove.pop(non_error_daemon_index)
+                else:
+                    # all daemons in list are in error state
+                    # we should be able to remove all of them
+                    break
             for d in daemons_to_remove:
                 r = True
                 assert d.hostname is not None