From: Guillaume Abrioux Date: Wed, 10 Apr 2024 13:00:21 +0000 (+0200) Subject: cephadm: check if file exists when passing `--apply_spec` X-Git-Tag: v20.0.0~2048^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2ede1484925452c1ba717de0b9e8f9310c128bfb;p=ceph.git cephadm: check if file exists when passing `--apply_spec` cephadm deploys the cluster, fails and does a rollback. If the passed file doesn't exist we can make the CLI fail early instead. ``` ... omitted output ... Applying ../host-spec.yaml to cluster FileNotFoundError: [Errno 2] No such file or directory: '../host-spec.yaml' *************** Cephadm hit an issue during cluster installation. Current cluster files will be deleted automatically. To disable this behaviour you can pass the --no-cleanup-on-failure flag. In case of any previous broken installation, users must use the following command to completely delete the broken cluster: > cephadm rm-cluster --force --zap-osds --fsid for more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster *************** Deleting cluster with fsid: 6e6a2dbe-f73a-11ee-8262-98be948800fd Traceback (most recent call last): File "/usr/lib64/python3.9/runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/lib64/python3.9/runpy.py", line 87, in _run_code exec(code, run_globals) File "/tmp/tmpive4g9gs.cephadm.build/app/__main__.py", line 5615, in File "/tmp/tmpive4g9gs.cephadm.build/app/__main__.py", line 5603, in main File "/tmp/tmpive4g9gs.cephadm.build/app/__main__.py", line 2693, in _rollback File "/tmp/tmpive4g9gs.cephadm.build/app/__main__.py", line 445, in _default_image File "/tmp/tmpive4g9gs.cephadm.build/app/__main__.py", line 2958, in command_bootstrap FileNotFoundError: [Errno 2] No such file or directory: '../host-spec.yaml' ``` Signed-off-by: Guillaume Abrioux --- diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 6257fb11d13..8e2677d35ea 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -2695,8 +2695,9 @@ def rollback(func: FuncT) -> FuncT: # another cluster with the provided fsid already exists: don't remove. raise except (KeyboardInterrupt, Exception) as e: - logger.error(f'{type(e).__name__}: {e}') - if ctx.no_cleanup_on_failure: + # If ctx.fsid is None it would print meaningless message suggesting + # running "cephadm rm-cluster --force --fsid None" + if ctx.no_cleanup_on_failure and ctx.fsid is not None: logger.info('\n\n' '\t***************\n' '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically. To change\n' @@ -2706,7 +2707,10 @@ def rollback(func: FuncT) -> FuncT: '\t > cephadm rm-cluster --force --zap-osds --fsid \n\n' '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n' '\t***************\n\n') - else: + if not ctx.no_cleanup_on_failure: + # The logger.error() used to be called before these conditions, which resulted in the error being printed twice. + # Moving it inside this condition to print the error if _rm_cluster() is called and also fails. + logger.error(f'{type(e).__name__}: {e}') logger.info('\n\n' '\t***************\n' '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically.\n' @@ -2734,6 +2738,13 @@ def command_bootstrap(ctx): if not ctx.output_pub_ssh_key: ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY) + if ctx.apply_spec and not os.path.exists(ctx.apply_spec): + # Given that nothing has been deployed at this point, setting `ctx.no_cleanup_on_failure = True` + # as there's no need to call _rm_cluster() which would generate the message: + # "ERROR: must select the cluster to delete by passing --fsid to proceed" + ctx.no_cleanup_on_failure = True + raise Error(f"--apply-spec has been specified but {ctx.apply_spec} doesn't exist.") + if ( (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key)) and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert))