From: Kefu Chai Date: Wed, 4 Mar 2026 13:10:28 +0000 (+0800) Subject: cephadm: fix rm-cluster when /etc/ceph/ceph.conf is a directory X-Git-Tag: v21.0.1~618^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F67621%2Fhead;p=ceph.git cephadm: fix rm-cluster when /etc/ceph/ceph.conf is a directory Fix IsADirectoryError in _rm_cluster() when /etc/ceph/ceph.conf exists as a directory instead of a file. The Error: ---------- During cluster cleanup, _rm_cluster() fails with: Traceback (most recent call last): File "/home/ubuntu/cephtest/cephadm", line 8634, in main() File "/home/ubuntu/cephtest/cephadm", line 8622, in main r = ctx.func(ctx) File "/home/ubuntu/cephtest/cephadm", line 6538, in command_rm_cluster with open(files[0]) as f: IsADirectoryError: [Errno 21] Is a directory: '/etc/ceph/ceph.conf' Root Cause: ----------- Container services (iSCSI, NFS, NVMe-oF) create bind mounts like: mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' Docker/Podman has a quirk: when bind mounting to a destination that doesn't exist, the container runtime creates the destination as a DIRECTORY, not a file. The Fix: -------- Use early returns and guard clauses to handle different states of /etc/ceph/ceph.conf: 1. If ceph.conf doesn't exist: return early (nothing to clean up) 2. If ceph.conf is an empty directory: remove it and return (leftover bind mount point from container) 3. If ceph.conf exists but is not a file: return early (unexpected state) 4. If ceph.conf is a file: validate fsid and remove all config files Also log a warning when the fsid in ceph.conf does not match the cluster being removed, so the user knows why config files are not being deleted. Includes unit tests covering all code paths. Fixes: https://tracker.ceph.com/issues/75275 Signed-off-by: Kefu Chai Co-authored-by: ADI2705 <2021bit038@sggs.ac.in> --- diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 4e255e55165..0c632cf80a0 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -4120,16 +4120,30 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None: # clean up config, keyring, and pub key files files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING] - if os.path.exists(files[0]): - valid_fsid = False - with open(files[0]) as f: - if ctx.fsid in f.read(): - valid_fsid = True - if valid_fsid: - # rm configuration files on /etc/ceph - for n in range(0, len(files)): - if os.path.exists(files[n]): - os.remove(files[n]) + if not os.path.exists(files[0]): + return + + if os.path.isdir(files[0]) and not os.listdir(files[0]): + # If ceph.conf is an empty directory, it's a leftover bind mount + # point from a container. Remove it and return - there's no config + # file to validate and no other files to clean up. + Path(files[0]).rmdir() + return + + if not os.path.isfile(files[0]): + return + + # Validate fsid from ceph.conf + with open(files[0]) as f: + if ctx.fsid not in f.read(): + logger.warning('Cluster fsid %s does not match %s, ' + 'leaving config files untouched', + ctx.fsid, files[0]) + return + + # rm configuration files on /etc/ceph + for f in files: + Path(f).unlink(missing_ok=True) ################################## diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index 4b0ecc6310b..7d2402d8b9b 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -3145,3 +3145,132 @@ class TestRescan(fake_filesystem_unittest.TestCase): self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='unknown') out = _cephadm.command_rescan_disks(self.ctx) assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed') + + +class TestRmClusterConfigCleanup(fake_filesystem_unittest.TestCase): + """Tests for config cleanup logic in _rm_cluster().""" + + TEST_FSID = '00000000-0000-0000-0000-0000deadbeef' + + def setUp(self): + self.setUpPyfakefs() + if not fake_filesystem.is_root(): + fake_filesystem.set_uid(0) + + # Create directories that _rm_cluster expects + self.fs.create_dir('/var/lib/ceph') + self.fs.create_dir('/var/log/ceph') + self.fs.create_dir('/run/cephadm') + self.fs.create_dir('/etc/logrotate.d') + self.fs.create_dir('/etc/systemd/system') + self.fs.create_dir('/etc/sysctl.d') + self.fs.create_dir('/etc/ceph') + + def _make_ctx(self): + ctx = _cephadm.CephadmContext() + ctx.fsid = self.TEST_FSID + ctx.data_dir = '/var/lib/ceph' + ctx.log_dir = '/var/log/ceph' + ctx.unit_dir = '/etc/systemd/system' + ctx.logrotate_dir = '/etc/logrotate.d' + ctx.sysctl_dir = '/etc/sysctl.d' + return ctx + + @mock.patch('cephadm.get_ceph_cluster_count', return_value=1) + @mock.patch('cephadm.daemons_summary', return_value=[]) + @mock.patch('cephadmlib.systemd.terminate_service') + @mock.patch('cephadm.terminate_service') + @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0)) + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_config_file_matching_fsid( + self, _logger, _call, _call2, _term, _term2, _daemons, _count + ): + """ceph.conf with matching fsid - all config files should be removed.""" + conf_content = get_ceph_conf(fsid=self.TEST_FSID) + self.fs.create_file('/etc/ceph/ceph.conf', contents=conf_content) + self.fs.create_file('/etc/ceph/ceph.pub', contents='pubkey') + self.fs.create_file('/etc/ceph/ceph.client.admin.keyring', contents='keyring') + + ctx = self._make_ctx() + _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False) + + assert not os.path.exists('/etc/ceph/ceph.conf') + assert not os.path.exists('/etc/ceph/ceph.pub') + assert not os.path.exists('/etc/ceph/ceph.client.admin.keyring') + + @mock.patch('cephadm.get_ceph_cluster_count', return_value=1) + @mock.patch('cephadm.daemons_summary', return_value=[]) + @mock.patch('cephadmlib.systemd.terminate_service') + @mock.patch('cephadm.terminate_service') + @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0)) + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_config_file_wrong_fsid( + self, _logger, _call, _call2, _term, _term2, _daemons, _count + ): + """ceph.conf with non-matching fsid - files should be preserved and warning logged.""" + conf_content = get_ceph_conf(fsid='aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') + self.fs.create_file('/etc/ceph/ceph.conf', contents=conf_content) + self.fs.create_file('/etc/ceph/ceph.pub', contents='pubkey') + self.fs.create_file('/etc/ceph/ceph.client.admin.keyring', contents='keyring') + + ctx = self._make_ctx() + _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False) + + assert os.path.exists('/etc/ceph/ceph.conf') + assert os.path.exists('/etc/ceph/ceph.pub') + assert os.path.exists('/etc/ceph/ceph.client.admin.keyring') + _logger.warning.assert_called_once() + + @mock.patch('cephadm.get_ceph_cluster_count', return_value=1) + @mock.patch('cephadm.daemons_summary', return_value=[]) + @mock.patch('cephadmlib.systemd.terminate_service') + @mock.patch('cephadm.terminate_service') + @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0)) + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_config_not_exists( + self, _logger, _call, _call2, _term, _term2, _daemons, _count + ): + """ceph.conf doesn't exist - should not raise any error.""" + ctx = self._make_ctx() + _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False) + + @mock.patch('cephadm.get_ceph_cluster_count', return_value=1) + @mock.patch('cephadm.daemons_summary', return_value=[]) + @mock.patch('cephadmlib.systemd.terminate_service') + @mock.patch('cephadm.terminate_service') + @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0)) + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_config_empty_directory( + self, _logger, _call, _call2, _term, _term2, _daemons, _count + ): + """ceph.conf is an empty directory (bind mount leftover) - should be removed.""" + self.fs.create_dir('/etc/ceph/ceph.conf') + + ctx = self._make_ctx() + _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False) + + assert not os.path.exists('/etc/ceph/ceph.conf') + + @mock.patch('cephadm.get_ceph_cluster_count', return_value=1) + @mock.patch('cephadm.daemons_summary', return_value=[]) + @mock.patch('cephadmlib.systemd.terminate_service') + @mock.patch('cephadm.terminate_service') + @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0)) + @mock.patch('cephadm.call', return_value=('', '', 0)) + @mock.patch('cephadm.logger') + def test_config_nonempty_directory( + self, _logger, _call, _call2, _term, _term2, _daemons, _count + ): + """ceph.conf is a non-empty directory - should be left alone.""" + self.fs.create_dir('/etc/ceph/ceph.conf') + self.fs.create_file('/etc/ceph/ceph.conf/somefile', contents='data') + + ctx = self._make_ctx() + _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False) + + assert os.path.exists('/etc/ceph/ceph.conf') + assert os.path.isdir('/etc/ceph/ceph.conf')