]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: catch CancelledError in asyncio timeout handler 56103/head
authorAdam King <adking@redhat.com>
Fri, 16 Feb 2024 16:24:32 +0000 (11:24 -0500)
committerAdam King <adking@redhat.com>
Wed, 13 Mar 2024 14:12:25 +0000 (10:12 -0400)
Specifically, concurrent.futures.CancelledError. At least on
python 3.9, this error can be raised when certain commands
being run asynchronously fail. Not catching this results in
the whole cephadm module crashing with something like

Traceback (most recent call last):
  File "/usr/share/ceph/mgr/cephadm/utils.py", line 94, in do_work
    return f(*arg)
  File "/usr/share/ceph/mgr/cephadm/serve.py", line 267, in refresh
    r = self._refresh_facts(host)
  File "/usr/share/ceph/mgr/cephadm/serve.py", line 370, in _refresh_facts
    val = self.mgr.wait_async(self._run_cephadm_json(
  File "/usr/share/ceph/mgr/cephadm/module.py", line 671, in wait_async
    return self.event_loop.get_result(coro, timeout)
  File "/usr/share/ceph/mgr/cephadm/ssh.py", line 64, in get_result
    return future.result(timeout)
  File "/lib64/python3.9/concurrent/futures/_base.py", line 444, in result
    raise CancelledError()
concurrent.futures._base.CancelledError

Fixes: https://tracker.ceph.com/issues/64473
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit 9c34973932bf3a0ec50c1c63bcba5e35bfe407e5)

src/pybind/mgr/cephadm/module.py

index bb23564d95fa9b62aa03f78f835d21aeaafb0e4a..e1a32000688bec0025bdc74cb70fcad949540723 100644 (file)
@@ -753,6 +753,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             else:
                 err_str += (f'(default {self.default_cephadm_command_timeout} second timeout)')
             raise OrchestratorError(err_str)
+        except concurrent.futures.CancelledError as e:
+            err_str = ''
+            if cmd:
+                err_str = f'Command "{cmd}" failed '
+            else:
+                err_str = 'Command failed '
+            if host:
+                err_str += f'on host {host} '
+            err_str += f' - {str(e)}'
+            raise OrchestratorError(err_str)
 
     def set_container_image(self, entity: str, image: str) -> None:
         self.check_mon_command({