]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
cephadm: fix rm-cluster when /etc/ceph/ceph.conf is a directory 67621/head
authorKefu Chai <k.chai@proxmox.com>
Wed, 4 Mar 2026 13:10:28 +0000 (21:10 +0800)
committerKefu Chai <k.chai@proxmox.com>
Wed, 4 Mar 2026 13:12:41 +0000 (21:12 +0800)
Fix IsADirectoryError in _rm_cluster() when /etc/ceph/ceph.conf exists
as a directory instead of a file.

The Error:
----------
During cluster cleanup, _rm_cluster() fails with:

  Traceback (most recent call last):
    File "/home/ubuntu/cephtest/cephadm", line 8634, in <module>
      main()
    File "/home/ubuntu/cephtest/cephadm", line 8622, in main
      r = ctx.func(ctx)
    File "/home/ubuntu/cephtest/cephadm", line 6538, in command_rm_cluster
      with open(files[0]) as f:
  IsADirectoryError: [Errno 21] Is a directory: '/etc/ceph/ceph.conf'

Root Cause:
-----------
Container services (iSCSI, NFS, NVMe-oF) create bind mounts like:
  mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'

Docker/Podman has a quirk: when bind mounting to a destination that
doesn't exist, the container runtime creates the destination as a
DIRECTORY, not a file.

The Fix:
--------
Use early returns and guard clauses to handle different states of
/etc/ceph/ceph.conf:

1. If ceph.conf doesn't exist: return early (nothing to clean up)
2. If ceph.conf is an empty directory: remove it and return (leftover
   bind mount point from container)
3. If ceph.conf exists but is not a file: return early (unexpected state)
4. If ceph.conf is a file: validate fsid and remove all config files

Also log a warning when the fsid in ceph.conf does not match the cluster
being removed, so the user knows why config files are not being deleted.

Includes unit tests covering all code paths.

Fixes: https://tracker.ceph.com/issues/75275
Signed-off-by: Kefu Chai <kchai@redhat.com>
Co-authored-by: ADI2705 <2021bit038@sggs.ac.in>
src/cephadm/cephadm.py
src/cephadm/tests/test_cephadm.py

index 4e255e551656bf9e793c3bfa7916937c0730adf9..0c632cf80a0b695d89ec56a002137d35e84298d6 100755 (executable)
@@ -4120,16 +4120,30 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:
 
     # clean up config, keyring, and pub key files
     files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING]
-    if os.path.exists(files[0]):
-        valid_fsid = False
-        with open(files[0]) as f:
-            if ctx.fsid in f.read():
-                valid_fsid = True
-        if valid_fsid:
-            # rm configuration files on /etc/ceph
-            for n in range(0, len(files)):
-                if os.path.exists(files[n]):
-                    os.remove(files[n])
+    if not os.path.exists(files[0]):
+        return
+
+    if os.path.isdir(files[0]) and not os.listdir(files[0]):
+        # If ceph.conf is an empty directory, it's a leftover bind mount
+        # point from a container. Remove it and return - there's no config
+        # file to validate and no other files to clean up.
+        Path(files[0]).rmdir()
+        return
+
+    if not os.path.isfile(files[0]):
+        return
+
+    # Validate fsid from ceph.conf
+    with open(files[0]) as f:
+        if ctx.fsid not in f.read():
+            logger.warning('Cluster fsid %s does not match %s, '
+                           'leaving config files untouched',
+                           ctx.fsid, files[0])
+            return
+
+    # rm configuration files on /etc/ceph
+    for f in files:
+        Path(f).unlink(missing_ok=True)
 
 ##################################
 
index 4b0ecc6310b7dfb6c1544c056fb3e603c0ce1bf9..7d2402d8b9b30ed0f0001f5353638d4dd2af023a 100644 (file)
@@ -3145,3 +3145,132 @@ class TestRescan(fake_filesystem_unittest.TestCase):
         self.fs.create_file('/sys/class/scsi_host/host1/proc_name', contents='unknown')
         out = _cephadm.command_rescan_disks(self.ctx)
         assert out.startswith('Ok. 2 adapters detected: 1 rescanned, 1 skipped, 0 failed')
+
+
+class TestRmClusterConfigCleanup(fake_filesystem_unittest.TestCase):
+    """Tests for config cleanup logic in _rm_cluster()."""
+
+    TEST_FSID = '00000000-0000-0000-0000-0000deadbeef'
+
+    def setUp(self):
+        self.setUpPyfakefs()
+        if not fake_filesystem.is_root():
+            fake_filesystem.set_uid(0)
+
+        # Create directories that _rm_cluster expects
+        self.fs.create_dir('/var/lib/ceph')
+        self.fs.create_dir('/var/log/ceph')
+        self.fs.create_dir('/run/cephadm')
+        self.fs.create_dir('/etc/logrotate.d')
+        self.fs.create_dir('/etc/systemd/system')
+        self.fs.create_dir('/etc/sysctl.d')
+        self.fs.create_dir('/etc/ceph')
+
+    def _make_ctx(self):
+        ctx = _cephadm.CephadmContext()
+        ctx.fsid = self.TEST_FSID
+        ctx.data_dir = '/var/lib/ceph'
+        ctx.log_dir = '/var/log/ceph'
+        ctx.unit_dir = '/etc/systemd/system'
+        ctx.logrotate_dir = '/etc/logrotate.d'
+        ctx.sysctl_dir = '/etc/sysctl.d'
+        return ctx
+
+    @mock.patch('cephadm.get_ceph_cluster_count', return_value=1)
+    @mock.patch('cephadm.daemons_summary', return_value=[])
+    @mock.patch('cephadmlib.systemd.terminate_service')
+    @mock.patch('cephadm.terminate_service')
+    @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0))
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_config_file_matching_fsid(
+        self, _logger, _call, _call2, _term, _term2, _daemons, _count
+    ):
+        """ceph.conf with matching fsid - all config files should be removed."""
+        conf_content = get_ceph_conf(fsid=self.TEST_FSID)
+        self.fs.create_file('/etc/ceph/ceph.conf', contents=conf_content)
+        self.fs.create_file('/etc/ceph/ceph.pub', contents='pubkey')
+        self.fs.create_file('/etc/ceph/ceph.client.admin.keyring', contents='keyring')
+
+        ctx = self._make_ctx()
+        _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False)
+
+        assert not os.path.exists('/etc/ceph/ceph.conf')
+        assert not os.path.exists('/etc/ceph/ceph.pub')
+        assert not os.path.exists('/etc/ceph/ceph.client.admin.keyring')
+
+    @mock.patch('cephadm.get_ceph_cluster_count', return_value=1)
+    @mock.patch('cephadm.daemons_summary', return_value=[])
+    @mock.patch('cephadmlib.systemd.terminate_service')
+    @mock.patch('cephadm.terminate_service')
+    @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0))
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_config_file_wrong_fsid(
+        self, _logger, _call, _call2, _term, _term2, _daemons, _count
+    ):
+        """ceph.conf with non-matching fsid - files should be preserved and warning logged."""
+        conf_content = get_ceph_conf(fsid='aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
+        self.fs.create_file('/etc/ceph/ceph.conf', contents=conf_content)
+        self.fs.create_file('/etc/ceph/ceph.pub', contents='pubkey')
+        self.fs.create_file('/etc/ceph/ceph.client.admin.keyring', contents='keyring')
+
+        ctx = self._make_ctx()
+        _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False)
+
+        assert os.path.exists('/etc/ceph/ceph.conf')
+        assert os.path.exists('/etc/ceph/ceph.pub')
+        assert os.path.exists('/etc/ceph/ceph.client.admin.keyring')
+        _logger.warning.assert_called_once()
+
+    @mock.patch('cephadm.get_ceph_cluster_count', return_value=1)
+    @mock.patch('cephadm.daemons_summary', return_value=[])
+    @mock.patch('cephadmlib.systemd.terminate_service')
+    @mock.patch('cephadm.terminate_service')
+    @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0))
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_config_not_exists(
+        self, _logger, _call, _call2, _term, _term2, _daemons, _count
+    ):
+        """ceph.conf doesn't exist - should not raise any error."""
+        ctx = self._make_ctx()
+        _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False)
+
+    @mock.patch('cephadm.get_ceph_cluster_count', return_value=1)
+    @mock.patch('cephadm.daemons_summary', return_value=[])
+    @mock.patch('cephadmlib.systemd.terminate_service')
+    @mock.patch('cephadm.terminate_service')
+    @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0))
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_config_empty_directory(
+        self, _logger, _call, _call2, _term, _term2, _daemons, _count
+    ):
+        """ceph.conf is an empty directory (bind mount leftover) - should be removed."""
+        self.fs.create_dir('/etc/ceph/ceph.conf')
+
+        ctx = self._make_ctx()
+        _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False)
+
+        assert not os.path.exists('/etc/ceph/ceph.conf')
+
+    @mock.patch('cephadm.get_ceph_cluster_count', return_value=1)
+    @mock.patch('cephadm.daemons_summary', return_value=[])
+    @mock.patch('cephadmlib.systemd.terminate_service')
+    @mock.patch('cephadm.terminate_service')
+    @mock.patch('cephadmlib.call_wrappers.call', return_value=('', '', 0))
+    @mock.patch('cephadm.call', return_value=('', '', 0))
+    @mock.patch('cephadm.logger')
+    def test_config_nonempty_directory(
+        self, _logger, _call, _call2, _term, _term2, _daemons, _count
+    ):
+        """ceph.conf is a non-empty directory - should be left alone."""
+        self.fs.create_dir('/etc/ceph/ceph.conf')
+        self.fs.create_file('/etc/ceph/ceph.conf/somefile', contents='data')
+
+        ctx = self._make_ctx()
+        _cephadm._rm_cluster(ctx, keep_logs=True, zap_osds=False)
+
+        assert os.path.exists('/etc/ceph/ceph.conf')
+        assert os.path.isdir('/etc/ceph/ceph.conf')