]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: catch CancelledError in asyncio timeout handler 55620/head
authorAdam King <adking@redhat.com>
Fri, 16 Feb 2024 16:24:32 +0000 (11:24 -0500)
committerAdam King <adking@redhat.com>
Mon, 26 Feb 2024 18:13:48 +0000 (13:13 -0500)
Specifically, concurrent.futures.CancelledError. At least on
python 3.9, this error can be raised when certain commands
being run asynchronously fail. Not catching this results in
the whole cephadm module crashing with something like

Traceback (most recent call last):
  File "/usr/share/ceph/mgr/cephadm/utils.py", line 94, in do_work
    return f(*arg)
  File "/usr/share/ceph/mgr/cephadm/serve.py", line 267, in refresh
    r = self._refresh_facts(host)
  File "/usr/share/ceph/mgr/cephadm/serve.py", line 370, in _refresh_facts
    val = self.mgr.wait_async(self._run_cephadm_json(
  File "/usr/share/ceph/mgr/cephadm/module.py", line 671, in wait_async
    return self.event_loop.get_result(coro, timeout)
  File "/usr/share/ceph/mgr/cephadm/ssh.py", line 64, in get_result
    return future.result(timeout)
  File "/lib64/python3.9/concurrent/futures/_base.py", line 444, in result
    raise CancelledError()
concurrent.futures._base.CancelledError

Fixes: https://tracker.ceph.com/issues/64473
Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py

index 87f7024bb2555510a7418f357634559a2bed0593..7d9aa02bade468e57fc16028d7283548b31067ad 100644 (file)
@@ -761,6 +761,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             else:
                 err_str += (f'(default {self.default_cephadm_command_timeout} second timeout)')
             raise OrchestratorError(err_str)
+        except concurrent.futures.CancelledError as e:
+            err_str = ''
+            if cmd:
+                err_str = f'Command "{cmd}" failed '
+            else:
+                err_str = 'Command failed '
+            if host:
+                err_str += f'on host {host} '
+            err_str += f' - {str(e)}'
+            raise OrchestratorError(err_str)
 
     def set_container_image(self, entity: str, image: str) -> None:
         self.check_mon_command({