]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
cephadm: added sysctl-dir command for tuned profiles, use remove-file for strays
authorShweta Bhosale <Shweta.Bhosale1@ibm.com>
Mon, 11 May 2026 06:12:15 +0000 (11:42 +0530)
committerShweta Bhosale <Shweta.Bhosale1@ibm.com>
Thu, 11 Jun 2026 05:10:34 +0000 (10:40 +0530)
Added cephadm sysctl-dir with mutually exclusive --list (sorted basenames
under /etc/sysctl.d) and --apply-system (sysctl --system).
The cephadm mgr tuned profile logic calls sysctl-dir for listing and
reload, and remove-file for stray *-cephadm-tuned-profile.conf files

Fixes: https://tracker.ceph.com/issues/74045
Signed-off-by: Shweta Bhosale <Shweta.Bhosale1@ibm.com>
doc/man/8/cephadm.rst
src/cephadm/cephadm.py
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_tuned_profiles.py
src/pybind/mgr/cephadm/tuned_profiles.py

index 1d352ded429accc7581d5495eb1eb87075aa11e9..35e5e8fe99fbcfddb8a9739fd9ffe336ccf18334 100644 (file)
@@ -13,7 +13,7 @@ Synopsis
 |             [--log-dir LOG_DIR] [--logrotate-dir LOGROTATE_DIR]
 |             [--unit-dir UNIT_DIR] [--verbose] [--timeout TIMEOUT]
 |             [--retry RETRY] [--no-container-init]
-|             {version,pull,inspect-image,ls,list-networks,list-rdma,adopt,rm-daemon,rm-cluster,remove-file,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,check-online,prepare-host,prepare-host-sudo-hardening,setup-ssh-user,add-repo,rm-repo,install,list-images,update-osd-service}
+|             {version,pull,inspect-image,ls,list-networks,list-rdma,adopt,rm-daemon,rm-cluster,remove-file,sysctl-dir,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,check-online,prepare-host,prepare-host-sudo-hardening,setup-ssh-user,add-repo,rm-repo,install,list-images,update-osd-service}
 |               ...
 
 
@@ -94,6 +94,8 @@ Synopsis
 
 | **cephadm** **remove-file** [-h] [--fsid FSID] --path PATH
 
+| **cephadm** **sysctl-dir** [-h] [--fsid FSID] (--list | --apply-system)
+
 | **cephadm** **prepare-host**
 
 | **cephadm** **add-repo** [-h] [--release RELEASE] [--version VERSION]
@@ -315,6 +317,18 @@ Arguments:
 * --path PATH     absolute path of the file to remove (required)
 
 
+sysctl-dir
+----------
+
+List basenames under ``/etc/sysctl.d`` or run ``sysctl --system`` on the local host.
+
+Arguments:
+
+* [--fsid FSID]      cluster FSID (passed automatically when invoked by the orchestrator)
+* --list             print one directory entry per line (sorted)
+* --apply-system     reload sysctl settings from all configuration paths
+
+
 deploy
 ------
 
index d49e96ea45be0fa3afa3b549ba24216af3e830f8..20f26e48d25a60654a91b19939141cc7418ce4c7 100755 (executable)
@@ -4666,6 +4666,25 @@ def command_remove_file(ctx: CephadmContext) -> int:
     return 0
 
 
+def command_sysctl_dir(ctx: CephadmContext) -> int:
+    """List basenames under sysctl.d or run sysctl --system"""
+    action = ctx.sysctl_dir_action
+    sysctl_dir = Path(SYSCTL_DIR)
+    if action == 'list':
+        if not sysctl_dir.is_dir():
+            raise Error(f'Not a directory: {SYSCTL_DIR}')
+        for name in sorted(p.name for p in sysctl_dir.iterdir()):
+            print(name)
+        return 0
+    if action == 'apply_system':
+        _out, _err, code = call(
+            ctx, ['sysctl', '--system'], verbosity=CallVerbosity.DEBUG)
+        if code:
+            raise Error(f'sysctl --system failed with code {code}: {_err}')
+        return 0
+    raise Error('sysctl-dir: no action specified')
+
+
 ##################################
 
 
@@ -5732,6 +5751,28 @@ def _get_parser():
         dest='remove_file_path',
         help='absolute path of the file to remove')
 
+    parser_sysctl_dir = subparsers.add_parser(
+        'sysctl-dir',
+        help='list entries in sysctl.d or run sysctl --system')
+    parser_sysctl_dir.set_defaults(func=command_sysctl_dir)
+    parser_sysctl_dir.add_argument(
+        '--fsid',
+        help='cluster FSID')
+    _sysctl_dir_action = parser_sysctl_dir.add_mutually_exclusive_group(
+        required=True)
+    _sysctl_dir_action.add_argument(
+        '--list',
+        dest='sysctl_dir_action',
+        action='store_const',
+        const='list',
+        help=f'print one basename per line from {SYSCTL_DIR}')
+    _sysctl_dir_action.add_argument(
+        '--apply-system',
+        dest='sysctl_dir_action',
+        action='store_const',
+        const='apply_system',
+        help='reload sysctl settings from all config paths (sysctl --system)')
+
     parser_maintenance = subparsers.add_parser(
         'host-maintenance', help='Manage the maintenance state of a host')
     parser_maintenance.add_argument(
index efba1cf777ebeda61bbfd26b0cc406e78e69af89..1644ceca0efdfd9ffb62ee5990f37145684e3a19 100644 (file)
@@ -144,6 +144,46 @@ class TestCephAdm(object):
                 _cephadm.command_remove_file(ctx)
         assert cephadm_fs.exists(link)
 
+    def test_command_sysctl_dir_list(self, cephadm_fs, capsys):
+        from cephadmlib.constants import SYSCTL_DIR
+        cephadm_fs.create_dir(SYSCTL_DIR)
+        cephadm_fs.create_file(os.path.join(SYSCTL_DIR, 'c.conf'))
+        cephadm_fs.create_file(os.path.join(SYSCTL_DIR, 'a.conf'))
+        with with_cephadm_ctx(
+            ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--list']
+        ) as ctx:
+            assert _cephadm.command_sysctl_dir(ctx) == 0
+        assert capsys.readouterr().out.splitlines() == ['a.conf', 'c.conf']
+
+    def test_command_sysctl_dir_list_missing_dir(self, cephadm_fs):
+        import shutil
+        from cephadmlib.constants import SYSCTL_DIR
+        # cephadm_fs already has /etc (e.g. from UNIT_DIR). Only sysctl.d must be absent.
+        if cephadm_fs.exists(SYSCTL_DIR):
+            shutil.rmtree(SYSCTL_DIR)
+        assert not cephadm_fs.exists(SYSCTL_DIR)
+        with with_cephadm_ctx(
+            ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--list']
+        ) as ctx:
+            with pytest.raises(_cephadm.Error, match='Not a directory'):
+                _cephadm.command_sysctl_dir(ctx)
+
+    def test_command_sysctl_dir_apply_system(self, cephadm_fs):
+        with with_cephadm_ctx(
+            ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--apply-system']
+        ) as ctx:
+            assert _cephadm.command_sysctl_dir(ctx) == 0
+
+    def test_command_sysctl_dir_apply_system_failure(self, cephadm_fs):
+        # Do not let with_cephadm_ctx re-patch cephadm.call back to success (exit 0).
+        with mock.patch('cephadm.call', return_value=('out', 'sysctl failed', 1)):
+            with with_cephadm_ctx(
+                ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--apply-system'],
+                mock_cephadm_call_fn=False,
+            ) as ctx:
+                with pytest.raises(_cephadm.Error, match='sysctl --system failed'):
+                    _cephadm.command_sysctl_dir(ctx)
+
     @mock.patch('cephadm.socket.socket.bind')
     @mock.patch('cephadm.logger')
     def test_port_in_use_special_cases(self, _logger, _bind):
index 9db971f6f216e4a851eb26204e574ef5a5aeef26..b1660d9e957477226ff2228ae2ee15b186bb1c80 100644 (file)
@@ -1,11 +1,14 @@
+import asyncio
 import pytest
 import json
+from contextlib import contextmanager
 from tests import mock
 from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR
+from cephadm.utils import cephadmNoImage
 from cephadm.inventory import TunedProfileStore
 from ceph.utils import datetime_now
 from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec
-from cephadm.ssh import SSHManager, RemoteCommand, Executables
+from cephadm.ssh import SSHManager
 from orchestrator import HostSpec
 
 from typing import List, Dict
@@ -67,6 +70,13 @@ class FakeMgr:
         self.offline_hosts = []
         self.log_refresh_metadata = False
 
+    @contextmanager
+    def async_timeout_handler(self, _host, _msg):
+        yield
+
+    def wait_async(self, coro, timeout=None):
+        return asyncio.run(coro)
+
     def set_store(self, what: str, value: str):
         raise SaveError(f'{what}: {value}')
 
@@ -128,57 +138,48 @@ class TestTunedProfiles:
         ]
         _write_profiles.assert_has_calls(calls, any_order=True)
 
-    @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
-    def test_rm_stray_tuned_profiles(self, _check_execute_command):
+    @mock.patch('cephadm.tuned_profiles.CephadmServe._run_cephadm', new_callable=mock.AsyncMock)
+    @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_apply_system')
+    @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_list')
+    def test_rm_stray_tuned_profiles(
+            self, _sysctl_dir_list, _sysctl_dir_apply_system, _run_cephadm):
         profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
         # for this test, going to use host "a" and put 4 cephadm generated
         # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2")
         # as well as a file not generated by cephadm. Only the "p3" and "who"
-        # profiles should be removed from the host. This should total to 4
-        # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system"
-        _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf',
-                                                         'p2-cephadm-tuned-profile.conf',
-                                                         'p3-cephadm-tuned-profile.conf',
-                                                         'who-cephadm-tuned-profile.conf',
-                                                         'dont-touch-me'])
+        # profiles should be removed via cephadm remove-file. List/apply use cephadm.
+        _sysctl_dir_list.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf',
+                                                   'p2-cephadm-tuned-profile.conf',
+                                                   'p3-cephadm-tuned-profile.conf',
+                                                   'who-cephadm-tuned-profile.conf',
+                                                   'dont-touch-me'])
+        _run_cephadm.return_value = ([''], [''], 0)
         mgr = FakeMgr(['a', 'b', 'c'],
                       ['a', 'b', 'c'],
                       [],
                       profiles)
         tp = TunedProfileUtils(mgr)
         tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
-        calls = [
-            mock.call(
-                'a', RemoteCommand(Executables.LS, [SYSCTL_DIR]), log_command=False
-            ),
-            mock.call(
-                'a',
-                RemoteCommand(
-                    Executables.RM,
-                    ['-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']
-                )
-            ),
+        _sysctl_dir_list.assert_called_once_with('a')
+        _sysctl_dir_apply_system.assert_called_once_with('a')
+        rm_calls = [
             mock.call(
-                'a',
-                RemoteCommand(
-                    Executables.RM,
-                    ['-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']
-                )
-            ),
+                'a', cephadmNoImage, 'remove-file',
+                ['--path', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
             mock.call(
-                'a', RemoteCommand(Executables.SYSCTL, ['--system'])
-            ),
+                'a', cephadmNoImage, 'remove-file',
+                ['--path', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
         ]
-        _check_execute_command.assert_has_calls(calls, any_order=True)
+        _run_cephadm.assert_has_calls(rm_calls, any_order=True)
+        assert _run_cephadm.call_count == 2
 
-    @mock.patch('cephadm.ssh.SSHManager.check_execute_command')
+    @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_apply_system')
     @mock.patch('cephadm.ssh.SSHManager.write_remote_file')
-    def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command):
+    def test_write_tuned_profiles(self, _write_remote_file, _sysctl_dir_apply_system):
         profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3}
         # for this test we will use host "a" and have it so host_needs_tuned_profile_update
         # returns True for p2 and False for p1 (see FakeCache class). So we should see
-        # 2 ssh calls, one to write p2, one to run sysctl --system
-        _check_execute_command.return_value = 'success'
+        # one write for p2 and sysctl-dir --apply-system via cephadm.
         _write_remote_file.return_value = 'success'
         mgr = FakeMgr(['a', 'b', 'c'],
                       ['a', 'b', 'c'],
@@ -186,9 +187,7 @@ class TestTunedProfiles:
                       profiles)
         tp = TunedProfileUtils(mgr)
         tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
-        _check_execute_command.assert_called_with(
-            'a', RemoteCommand(Executables.SYSCTL, ['--system'])
-        )
+        _sysctl_dir_apply_system.assert_called_once_with('a')
         _write_remote_file.assert_called_with(
             'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8'))
 
index 7a37d9379044b6a73db7965e40274225b2950002..d3f07342bebbfcda66e67bd57d6189b3e7ded977 100644 (file)
@@ -3,7 +3,8 @@ from typing import Dict, List, TYPE_CHECKING
 from ceph.utils import datetime_now
 from .schedule import HostAssignment
 from ceph.deployment.service_spec import ServiceSpec, TunedProfileSpec
-from . import ssh
+from .serve import CephadmServe
+from .utils import cephadmNoImage
 
 if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
@@ -12,13 +13,33 @@ logger = logging.getLogger(__name__)
 
 SYSCTL_DIR = '/etc/sysctl.d'
 
-SYSCTL_SYSTEM_CMD = ssh.RemoteCommand(ssh.Executables.SYSCTL, ['--system'])
+SYSCTL_DIR_CEPHADM_CMD = 'sysctl-dir'
 
 
 class TunedProfileUtils():
     def __init__(self, mgr: "CephadmOrchestrator") -> None:
         self.mgr = mgr
 
+    def _sysctl_dir_list(self, host: str) -> str:
+        with self.mgr.async_timeout_handler(host, 'cephadm sysctl-dir --list'):
+            out, _err, _code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm(
+                host,
+                cephadmNoImage,
+                SYSCTL_DIR_CEPHADM_CMD,
+                ['--list'],
+                log_output=self.mgr.log_refresh_metadata,
+            ))
+        return ''.join(out)
+
+    def _sysctl_dir_apply_system(self, host: str) -> None:
+        with self.mgr.async_timeout_handler(host, 'cephadm sysctl-dir --apply-system'):
+            self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm(
+                host,
+                cephadmNoImage,
+                SYSCTL_DIR_CEPHADM_CMD,
+                ['--apply-system'],
+            ))
+
     def _profile_to_str(self, p: TunedProfileSpec) -> str:
         p_str = f'# created by cephadm\n# tuned profile "{p.profile_name}"\n\n'
         for k, v in p.settings.items():
@@ -72,8 +93,7 @@ class TunedProfileUtils():
         """
         if self.mgr.cache.is_host_unreachable(host):
             return
-        cmd = ssh.RemoteCommand(ssh.Executables.LS, [SYSCTL_DIR])
-        found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n')
+        found_files = self._sysctl_dir_list(host).split('\n')
         found_files = [s.strip() for s in found_files]
         profile_names: List[str] = sum([[*p] for p in profiles], [])  # extract all profiles names
         profile_names = list(set(profile_names))  # remove duplicates
@@ -84,11 +104,13 @@ class TunedProfileUtils():
                 continue
             if file not in expected_files:
                 logger.info(f'Removing stray tuned profile file {file}')
-                cmd = ssh.RemoteCommand(ssh.Executables.RM, ['-f', f'{SYSCTL_DIR}/{file}'])
-                self.mgr.ssh.check_execute_command(host, cmd)
+                path = f'{SYSCTL_DIR}/{file}'
+                with self.mgr.async_timeout_handler(host, f'cephadm remove-file ({path})'):
+                    self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm(
+                        host, cephadmNoImage, 'remove-file', ['--path', path]))
                 updated = True
         if updated:
-            self.mgr.ssh.check_execute_command(host, SYSCTL_SYSTEM_CMD)
+            self._sysctl_dir_apply_system(host)
 
     def _write_tuned_profiles(self, host: str, profiles: List[Dict[str, str]]) -> None:
         if self.mgr.cache.is_host_unreachable(host):
@@ -102,5 +124,5 @@ class TunedProfileUtils():
                     self.mgr.ssh.write_remote_file(host, profile_filename, content.encode('utf-8'))
                     updated = True
         if updated:
-            self.mgr.ssh.check_execute_command(host, SYSCTL_SYSTEM_CMD)
+            self._sysctl_dir_apply_system(host)
         self.mgr.cache.last_tuned_profile_update[host] = datetime_now()