From cb8a612d83a5a67f388fe40fa3d8dcefc5accc00 Mon Sep 17 00:00:00 2001 From: Daniel Pivonka Date: Mon, 28 Jun 2021 10:12:13 -0400 Subject: [PATCH] mgr/cephadm: add ceph orch host drain and limit host removal to empty hosts ceph orch host drain removes all daemons from a host so it can be safely removed ceph orch host rm will only remove host that a safe to remove Signed-off-by: Daniel Pivonka --- doc/cephadm/host-management.rst | 39 ++++++++--------------- doc/cephadm/osd.rst | 1 + src/pybind/mgr/cephadm/module.py | 38 ++++++++++++++++++++++ src/pybind/mgr/orchestrator/_interface.py | 8 +++++ src/pybind/mgr/orchestrator/module.py | 7 ++++ 5 files changed, 67 insertions(+), 26 deletions(-) diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst index bcf626752fca9..35829fa4ac63d 100644 --- a/doc/cephadm/host-management.rst +++ b/doc/cephadm/host-management.rst @@ -64,48 +64,35 @@ To add each new host to the cluster, perform two steps: Removing Hosts ============== -If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node. +A host can safely be removed from a the cluster once all daemons are removed from it. -To remove a host from a cluster, do the following: +To drain all daemons from a host do the following: -For all Ceph service types, except for ``node-exporter`` and ``crash``, remove -the host from the placement specification file (for example, cluster.yml). -For example, if you are removing the host named host2, remove all occurrences of -``- host2`` from all ``placement:`` sections. - -Update: +.. prompt:: bash # -.. code-block:: yaml + ceph orch host drain ** - service_type: rgw - placement: - hosts: - - host1 - - host2 +The '_no_schedule' label will be applied to the host. See :ref:`cephadm-special-host-labels` -To: +All osds on the host will be scheduled to be removed. You can check osd removal progress with the following: -.. code-block:: yaml +.. prompt:: bash # + ceph orch osd rm status - service_type: rgw - placement: - hosts: - - host1 +see :ref:`cephadm-osd-removal` for more details about osd removal -Remove the host from cephadm's environment: +You can check if there are no deamons left on the host with the following: .. prompt:: bash # - ceph orch host rm host2 - + ceph orch ps -If the host is running ``node-exporter`` and crash services, remove them by running -the following command on the host: +Once all daemons are removed you can remove the host with the following: .. prompt:: bash # - cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME + ceph orch host rm .. _orchestrator-host-labels: diff --git a/doc/cephadm/osd.rst b/doc/cephadm/osd.rst index 5c01d038f84fa..47af05991660e 100644 --- a/doc/cephadm/osd.rst +++ b/doc/cephadm/osd.rst @@ -211,6 +211,7 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab * For cephadm, see also :ref:`cephadm-spec-unmanaged`. +.. _cephadm-osd-removal: Remove an OSD ============= diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 39a470c9dbae8..0cdc5abfcbf56 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1463,6 +1463,22 @@ Then run the following: :param host: host name """ + # Verify if it is possible to remove the host safely + daemons = self.cache.get_daemons_by_host(host) + if daemons: + self.log.warning(f"Blocked {host} removal. Daemons running: {daemons}") + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + return "Not allowed to remove %s from cluster. " \ + "The following daemons are running in the host:" \ + "\n%s\nPlease run 'ceph orch host drain %s' to remove daemons from host" % ( + host, daemons_table, host) + self.inventory.rm_host(host) self.cache.rm_host(host) self._reset_con(host) @@ -2564,3 +2580,25 @@ Then run the following: The CLI call to retrieve an osd removal report """ return self.to_remove_osds.all_osds() + + @handle_orch_error + def drain_host(self, hostname): + # type: (str) -> str + """ + Drain all daemons from a host. + :param host: host name + """ + self.add_host_label(hostname, '_no_schedule') + + daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_host(hostname) + + osds_to_remove = [d.daemon_id for d in daemons if d.daemon_type == 'osd'] + self.remove_osds(osds_to_remove) + + daemons_table = "" + daemons_table += "{:<20} {:<15}\n".format("type", "id") + daemons_table += "{:<20} {:<15}\n".format("-" * 20, "-" * 15) + for d in daemons: + daemons_table += "{:<20} {:<15}\n".format(d.daemon_type, d.daemon_id) + + return "Scheduled to remove the following daemons from host '{}'\n{}".format(hostname, daemons_table) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 9c18351599f3f..169513a50d5ef 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -351,6 +351,14 @@ class Orchestrator(object): """ raise NotImplementedError() + def drain_host(self, hostname: str) -> OrchResult[str]: + """ + drain all daemons from a host + + :param hostname: hostname + """ + raise NotImplementedError() + def update_host_addr(self, host: str, addr: str) -> OrchResult[str]: """ Update a host's address diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 691ddfd398988..4ca6281bc75f8 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -352,6 +352,13 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) + @_cli_write_command('orch host drain') + def _drain_host(self, hostname: str) -> HandleCommandResult: + """drain all daemons from a host""" + completion = self.drain_host(hostname) + raise_if_exception(completion) + return HandleCommandResult(stdout=completion.result_str()) + @_cli_write_command('orch host set-addr') def _update_set_addr(self, hostname: str, addr: str) -> HandleCommandResult: """Update a host address""" -- 2.39.5