Specifically, concurrent.futures.CancelledError. At least on
python 3.9, this error can be raised when certain commands
being run asynchronously fail. Not catching this results in
the whole cephadm module crashing with something like
Traceback (most recent call last):
File "/usr/share/ceph/mgr/cephadm/utils.py", line 94, in do_work
return f(*arg)
File "/usr/share/ceph/mgr/cephadm/serve.py", line 267, in refresh
r = self._refresh_facts(host)
File "/usr/share/ceph/mgr/cephadm/serve.py", line 370, in _refresh_facts
val = self.mgr.wait_async(self._run_cephadm_json(
File "/usr/share/ceph/mgr/cephadm/module.py", line 671, in wait_async
return self.event_loop.get_result(coro, timeout)
File "/usr/share/ceph/mgr/cephadm/ssh.py", line 64, in get_result
return future.result(timeout)
File "/lib64/python3.9/concurrent/futures/_base.py", line 444, in result
raise CancelledError()
concurrent.futures._base.CancelledError
Fixes: https://tracker.ceph.com/issues/64473
Signed-off-by: Adam King <adking@redhat.com>
else:
err_str += (f'(default {self.default_cephadm_command_timeout} second timeout)')
raise OrchestratorError(err_str)
+ except concurrent.futures.CancelledError as e:
+ err_str = ''
+ if cmd:
+ err_str = f'Command "{cmd}" failed '
+ else:
+ err_str = 'Command failed '
+ if host:
+ err_str += f'on host {host} '
+ err_str += f' - {str(e)}'
+ raise OrchestratorError(err_str)
def set_container_image(self, entity: str, image: str) -> None:
self.check_mon_command({