From 7189575404262c0fbb4717f824c022d48ab58fcd Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 4 Aug 2020 14:01:49 +0200 Subject: [PATCH] mgr/cephadm: Make `ceph orch * --refresh` asynchronous There is just no way we can allow those calls to be synchornous: * All CLI commands are blocked, till the refresh completes. * ^C doesn't help at all, as it just kills the client Therefore they have to be scheduled in the bg. Fixes: https://tracker.ceph.com/issues/46813 Signed-off-by: Sebastian Wagner --- src/pybind/mgr/cephadm/module.py | 57 ++++++++++++-------- src/pybind/mgr/cephadm/tests/test_cephadm.py | 9 ++-- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 576932c17de..631fc406ee0 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1449,15 +1449,25 @@ you may want to run: return f'failed to create /etc/ceph/ceph.conf on {host}: {str(e)}' return None + def _invalidate_daemons_and_kick_serve(self, filter_host=None): + if filter_host: + self.cache.invalidate_host_daemons(filter_host) + else: + for h in self.cache.get_hosts(): + # Also discover daemons deployed manually + self.cache.invalidate_host_daemons(h) + + self._kick_serve_loop() + @trivial_completion - def describe_service(self, service_type=None, service_name=None, - refresh=False) -> List[orchestrator.ServiceDescription]: + def describe_service(self, service_type: Optional[str]=None, service_name: Optional[str]=None, + refresh: bool=False) -> List[orchestrator.ServiceDescription]: if refresh: - # ugly sync path, FIXME someday perhaps? - for host in self.inventory.keys(): - self._refresh_host_daemons(host) + self._invalidate_daemons_and_kick_serve() + self.log.info('Kicked serve() loop to refresh all services') + # - sm = {} # type: Dict[str, orchestrator.ServiceDescription] + sm: Dict[str, orchestrator.ServiceDescription] = {} osd_count = 0 for h, dm in self.cache.get_daemons_with_volatile_status(): for name, dd in dm.items(): @@ -1538,15 +1548,16 @@ you may want to run: return list(sm.values()) @trivial_completion - def list_daemons(self, service_name=None, daemon_type=None, daemon_id=None, - host=None, refresh=False) -> List[orchestrator.DaemonDescription]: + def list_daemons(self, + service_name: Optional[str] = None, + daemon_type: Optional[str] = None, + daemon_id: Optional[str] = None, + host: Optional[str] = None, + refresh: bool = False) -> List[orchestrator.DaemonDescription]: if refresh: - # ugly sync path, FIXME someday perhaps? - if host: - self._refresh_host_daemons(host) - else: - for hostname in self.inventory.keys(): - self._refresh_host_daemons(hostname) + self._invalidate_daemons_and_kick_serve(host) + self.log.info('Kicked serve() loop to refresh all daemons') + result = [] for h, dm in self.cache.get_daemons_with_volatile_status(): if host and h != host: @@ -1652,7 +1663,7 @@ you may want to run: return f'Failed to remove service. <{service_name}> was not found.' @trivial_completion - def get_inventory(self, host_filter=None, refresh=False) -> List[orchestrator.InventoryHost]: + def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter]=None, refresh=False) -> List[orchestrator.InventoryHost]: """ Return the storage inventory of hosts matching the given filter. @@ -1662,17 +1673,19 @@ you may want to run: - add filtering by label """ if refresh: - # ugly sync path, FIXME someday perhaps? - if host_filter: - for host in host_filter.hosts: - self._refresh_host_devices(host) + if host_filter and host_filter.hosts: + for h in host_filter.hosts: + self.cache.invalidate_host_devices(h) else: - for host in self.inventory.keys(): - self._refresh_host_devices(host) + for h in self.cache.get_hosts(): + self.cache.invalidate_host_devices(h) + + self.event.set() + self.log.info('Kicked serve() loop to refresh devices') result = [] for host, dls in self.cache.devices.items(): - if host_filter and host not in host_filter.hosts: + if host_filter and host_filter.hosts and host not in host_filter.hosts: continue result.append(orchestrator.InventoryHost(host, inventory.Devices(dls))) diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 500587262d0..aa700e80ffc 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -196,7 +196,8 @@ class TestCephadm(object): def test_list_daemons(self, cephadm_module: CephadmOrchestrator): cephadm_module.service_cache_timeout = 10 with with_host(cephadm_module, 'test'): - c = cephadm_module.list_daemons(refresh=True) + cephadm_module._refresh_host_daemons('test') + c = cephadm_module.list_daemons() assert wait(cephadm_module, c)[0].name() == 'rgw.myrgw.foobar' @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('[]')) @@ -443,7 +444,8 @@ class TestCephadm(object): @mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0) def test_remove_osds(self, cephadm_module): with with_host(cephadm_module, 'test'): - c = cephadm_module.list_daemons(refresh=True) + cephadm_module._refresh_host_daemons('test') + c = cephadm_module.list_daemons() wait(cephadm_module, c) c = cephadm_module.remove_daemons(['osd.0']) @@ -496,7 +498,8 @@ class TestCephadm(object): )) def test_remove_daemon(self, cephadm_module): with with_host(cephadm_module, 'test'): - c = cephadm_module.list_daemons(refresh=True) + cephadm_module._refresh_host_daemons('test') + c = cephadm_module.list_daemons() wait(cephadm_module, c) c = cephadm_module.remove_daemons(['rgw.myrgw.myhost.myid']) out = wait(cephadm_module, c) -- 2.39.5