From c18ad7c8a3e63e6eb26947bb7ab47078ebfd0d60 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 15 Jun 2020 18:12:42 +0200 Subject: [PATCH] mgr/cephadm: Manage /etc/ceph/ceph.conf Signed-off-by: Sebastian Wagner --- src/pybind/mgr/cephadm/inventory.py | 22 ++++++++ src/pybind/mgr/cephadm/module.py | 54 +++++++++++++++++--- src/pybind/mgr/cephadm/tests/test_cephadm.py | 23 +++++++++ src/pybind/mgr/tests/__init__.py | 1 + 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 677b669e20ea4..d3ac260930d1f 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -175,6 +175,7 @@ class HostCache(): self.daemon_config_deps = {} # type: Dict[str, Dict[str, Dict[str,Any]]] self.last_host_check = {} # type: Dict[str, datetime.datetime] self.loading_osdspec_preview = set() # type: Set[str] + self.etc_ceph_ceph_conf_refresh_queue: Set[str] = set() def load(self): # type: () -> None @@ -216,6 +217,7 @@ class HostCache(): if 'last_host_check' in j: self.last_host_check[host] = datetime.datetime.strptime( j['last_host_check'], DATEFMT) + self.etc_ceph_ceph_conf_refresh_queue.add(host) self.mgr.log.debug( 'HostCache.load: host %s has %d daemons, ' '%d devices, %d networks' % ( @@ -260,6 +262,7 @@ class HostCache(): self.daemon_refresh_queue.append(host) self.device_refresh_queue.append(host) self.osdspec_previews_refresh_queue.append(host) + self.etc_ceph_ceph_conf_refresh_queue.add(host) def invalidate_host_daemons(self, host): # type: (str) -> None @@ -275,6 +278,9 @@ class HostCache(): del self.last_device_update[host] self.mgr.event.set() + def distribute_new_etc_ceph_ceph_conf(self): + self.etc_ceph_ceph_conf_refresh_queue = set(self.mgr.inventory.keys()) + def save_host(self, host): # type: (str) -> None j = { # type: ignore @@ -420,6 +426,22 @@ class HostCache(): seconds=self.mgr.host_check_interval) return host not in self.last_host_check or self.last_host_check[host] < cutoff + def host_needs_new_etc_ceph_ceph_conf(self, host): + if not self.mgr.manage_etc_ceph_ceph_conf: + return False + if self.mgr.paused: + return False + if host in self.mgr.offline_hosts: + return False + if host in self.etc_ceph_ceph_conf_refresh_queue: + # We're read-only here. + # self.etc_ceph_ceph_conf_refresh_queue.remove(host) + return True + return False + + def remove_host_needs_new_etc_ceph_ceph_conf(self, host): + self.etc_ceph_ceph_conf_refresh_queue.remove(host) + def add_daemon(self, host, dd): # type: (str, orchestrator.DaemonDescription) -> None assert host in self.daemons diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index b6eaf428c8434..fe58d866d2a5a 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -253,7 +253,13 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): 'type': 'bool', 'default': True, 'desc': 'manage configs like API endpoints in Dashboard.' - } + }, + { + 'name': 'manage_etc_ceph_ceph_conf', + 'type': 'bool', + 'default': False, + 'desc': 'Manage and own /etc/ceph/ceph.conf on the hosts.', + }, ] def __init__(self, *args, **kwargs): @@ -288,6 +294,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.prometheus_alerts_path = '' self.migration_current = None self.config_dashboard = True + self.manage_etc_ceph_ceph_conf = True self._cons = {} # type: Dict[str, Tuple[remoto.backends.BaseConnection,remoto.backends.LegacyModuleExecute]] @@ -547,10 +554,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.event.set() def config_notify_one(self, what): - pass + if what == 'manage_etc_ceph_ceph_conf' and self.manage_etc_ceph_ceph_conf: + self.cache.distribute_new_etc_ceph_ceph_conf() def notify(self, notify_type, notify_id): - pass + if notify_type == "mon_map": + self.cache.distribute_new_etc_ceph_ceph_conf() def pause(self): if not self.paused: @@ -909,7 +918,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): def _remote_connection(self, host: str, addr: Optional[str]=None, - ) -> Iterator[Tuple[BaseConnection, Any]]: + ) -> Iterator[Tuple["BaseConnection", Any]]: if not addr and host in self.inventory: addr = self.inventory.get_addr(host) @@ -1171,9 +1180,15 @@ you may want to run: r = self._refresh_host_osdspec_previews(host) if r: failures.append(r) - - refresh(self.cache.get_hosts()) + if self.cache.host_needs_new_etc_ceph_ceph_conf(host): + self.log.debug(f"deploying new /etc/ceph/ceph.conf on `{host}`") + r = self._deploy_etc_ceph_ceph_conf(host) + if r: + bad_hosts.append(r) + + refresh(self.cache.get_hosts()) + health_changed = False if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks: del self.health_checks['CEPHADM_HOST_CHECK_FAILED'] @@ -1285,6 +1300,31 @@ you may want to run: self.cache.save_host(host) return None + def _deploy_etc_ceph_ceph_conf(self, host: str) -> Optional[str]: + ret, config, err = self.check_mon_command({ + "prefix": "config generate-minimal-conf", + }) + + try: + with self._remote_connection(host) as tpl: + conn, connr = tpl + out, err, code = remoto.process.check( + conn, + ['mkdir', '-p', '/etc/ceph']) + if code: + return f'failed to create /etc/ceph on {host}: {err}' + out, err, code = remoto.process.check( + conn, + ['dd', 'of=/etc/ceph/ceph.conf'], + stdin=config.encode('utf-8') + ) + if code: + return f'failed to create /etc/ceph/ceph.conf on {host}: {err}' + self.cache.remove_host_needs_new_etc_ceph_ceph_conf(host) + except OrchestratorError as e: + return f'failed to create /etc/ceph/ceph.conf on {host}: {str(e)}' + return None + @trivial_completion def describe_service(self, service_type=None, service_name=None, refresh=False): @@ -1430,7 +1470,7 @@ you may want to run: host, name, 'unit', ['--name', name, a]) except Exception: - self.log.exception('cephadm failed') + self.log.exception(f'`{host}: cephadm unit {name} {a}` failed') self.cache.invalidate_host_daemons(host) return "{} {} from host '{}'".format(action, name, host) diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index e80728c3871e0..15a0eeaf6cd16 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1,6 +1,7 @@ import datetime import json from contextlib import contextmanager +from unittest.mock import ANY import pytest @@ -597,3 +598,25 @@ class TestCephadm(object): # code will blow up here triggering the BOOM! code, out, err = cephadm_module.check_host('test') assert err is None + + @mock.patch("cephadm.module.CephadmOrchestrator._get_connection") + @mock.patch("remoto.process.check") + def test_etc_ceph(self, _check, _get_connection, cephadm_module: CephadmOrchestrator): + _get_connection.return_value = mock.Mock(), mock.Mock() + _check.return_value = '{}', '', 0 + + with with_host(cephadm_module, 'test'): + assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + + with with_host(cephadm_module, 'test'): + cephadm_module.set_module_option('manage_etc_ceph_ceph_conf', True) + cephadm_module.config_notify() + assert cephadm_module.manage_etc_ceph_ceph_conf == True + + cephadm_module._refresh_hosts_and_daemons() + _check.assert_called_with(ANY, ['dd', 'of=/etc/ceph/ceph.conf'], stdin=b'') + + assert not cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') + + cephadm_module.notify('mon_map', mock.MagicMock()) + assert cephadm_module.cache.host_needs_new_etc_ceph_ceph_conf('test') diff --git a/src/pybind/mgr/tests/__init__.py b/src/pybind/mgr/tests/__init__.py index 8c54c79e52606..46f6728225108 100644 --- a/src/pybind/mgr/tests/__init__.py +++ b/src/pybind/mgr/tests/__init__.py @@ -66,6 +66,7 @@ if 'UNITTEST' in os.environ: self._ceph_get_option = mock.MagicMock() self._ceph_get_context = mock.MagicMock() self._ceph_register_client = mock.MagicMock() + self._ceph_set_health_checks = mock.MagicMock() self._configure_logging = lambda *_: None self._unconfigure_logging = mock.MagicMock() self._ceph_log = mock.MagicMock() -- 2.39.5