From: Adam King Date: Fri, 16 Feb 2024 16:24:32 +0000 (-0500) Subject: mgr/cephadm: catch CancelledError in asyncio timeout handler X-Git-Tag: v20.0.0~2516^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F55620%2Fhead;p=ceph.git mgr/cephadm: catch CancelledError in asyncio timeout handler Specifically, concurrent.futures.CancelledError. At least on python 3.9, this error can be raised when certain commands being run asynchronously fail. Not catching this results in the whole cephadm module crashing with something like Traceback (most recent call last): File "/usr/share/ceph/mgr/cephadm/utils.py", line 94, in do_work return f(*arg) File "/usr/share/ceph/mgr/cephadm/serve.py", line 267, in refresh r = self._refresh_facts(host) File "/usr/share/ceph/mgr/cephadm/serve.py", line 370, in _refresh_facts val = self.mgr.wait_async(self._run_cephadm_json( File "/usr/share/ceph/mgr/cephadm/module.py", line 671, in wait_async return self.event_loop.get_result(coro, timeout) File "/usr/share/ceph/mgr/cephadm/ssh.py", line 64, in get_result return future.result(timeout) File "/lib64/python3.9/concurrent/futures/_base.py", line 444, in result raise CancelledError() concurrent.futures._base.CancelledError Fixes: https://tracker.ceph.com/issues/64473 Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 87f7024bb2555..7d9aa02bade46 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -761,6 +761,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, else: err_str += (f'(default {self.default_cephadm_command_timeout} second timeout)') raise OrchestratorError(err_str) + except concurrent.futures.CancelledError as e: + err_str = '' + if cmd: + err_str = f'Command "{cmd}" failed ' + else: + err_str = 'Command failed ' + if host: + err_str += f'on host {host} ' + err_str += f' - {str(e)}' + raise OrchestratorError(err_str) def set_container_image(self, entity: str, image: str) -> None: self.check_mon_command({