From: Shweta Bhosale Date: Mon, 11 May 2026 06:12:15 +0000 (+0530) Subject: cephadm: added sysctl-dir command for tuned profiles, use remove-file for strays X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=56895ee07b9cf9d23ae46d7d71e7316cd3320227;p=ceph.git cephadm: added sysctl-dir command for tuned profiles, use remove-file for strays Added cephadm sysctl-dir with mutually exclusive --list (sorted basenames under /etc/sysctl.d) and --apply-system (sysctl --system). The cephadm mgr tuned profile logic calls sysctl-dir for listing and reload, and remove-file for stray *-cephadm-tuned-profile.conf files Fixes: https://tracker.ceph.com/issues/74045 Signed-off-by: Shweta Bhosale --- diff --git a/doc/man/8/cephadm.rst b/doc/man/8/cephadm.rst index 1d352ded429..35e5e8fe99f 100644 --- a/doc/man/8/cephadm.rst +++ b/doc/man/8/cephadm.rst @@ -13,7 +13,7 @@ Synopsis | [--log-dir LOG_DIR] [--logrotate-dir LOGROTATE_DIR] | [--unit-dir UNIT_DIR] [--verbose] [--timeout TIMEOUT] | [--retry RETRY] [--no-container-init] -| {version,pull,inspect-image,ls,list-networks,list-rdma,adopt,rm-daemon,rm-cluster,remove-file,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,check-online,prepare-host,prepare-host-sudo-hardening,setup-ssh-user,add-repo,rm-repo,install,list-images,update-osd-service} +| {version,pull,inspect-image,ls,list-networks,list-rdma,adopt,rm-daemon,rm-cluster,remove-file,sysctl-dir,run,shell,enter,ceph-volume,unit,logs,bootstrap,deploy,check-host,check-online,prepare-host,prepare-host-sudo-hardening,setup-ssh-user,add-repo,rm-repo,install,list-images,update-osd-service} | ... @@ -94,6 +94,8 @@ Synopsis | **cephadm** **remove-file** [-h] [--fsid FSID] --path PATH +| **cephadm** **sysctl-dir** [-h] [--fsid FSID] (--list | --apply-system) + | **cephadm** **prepare-host** | **cephadm** **add-repo** [-h] [--release RELEASE] [--version VERSION] @@ -315,6 +317,18 @@ Arguments: * --path PATH absolute path of the file to remove (required) +sysctl-dir +---------- + +List basenames under ``/etc/sysctl.d`` or run ``sysctl --system`` on the local host. + +Arguments: + +* [--fsid FSID] cluster FSID (passed automatically when invoked by the orchestrator) +* --list print one directory entry per line (sorted) +* --apply-system reload sysctl settings from all configuration paths + + deploy ------ diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index d49e96ea45b..20f26e48d25 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -4666,6 +4666,25 @@ def command_remove_file(ctx: CephadmContext) -> int: return 0 +def command_sysctl_dir(ctx: CephadmContext) -> int: + """List basenames under sysctl.d or run sysctl --system""" + action = ctx.sysctl_dir_action + sysctl_dir = Path(SYSCTL_DIR) + if action == 'list': + if not sysctl_dir.is_dir(): + raise Error(f'Not a directory: {SYSCTL_DIR}') + for name in sorted(p.name for p in sysctl_dir.iterdir()): + print(name) + return 0 + if action == 'apply_system': + _out, _err, code = call( + ctx, ['sysctl', '--system'], verbosity=CallVerbosity.DEBUG) + if code: + raise Error(f'sysctl --system failed with code {code}: {_err}') + return 0 + raise Error('sysctl-dir: no action specified') + + ################################## @@ -5732,6 +5751,28 @@ def _get_parser(): dest='remove_file_path', help='absolute path of the file to remove') + parser_sysctl_dir = subparsers.add_parser( + 'sysctl-dir', + help='list entries in sysctl.d or run sysctl --system') + parser_sysctl_dir.set_defaults(func=command_sysctl_dir) + parser_sysctl_dir.add_argument( + '--fsid', + help='cluster FSID') + _sysctl_dir_action = parser_sysctl_dir.add_mutually_exclusive_group( + required=True) + _sysctl_dir_action.add_argument( + '--list', + dest='sysctl_dir_action', + action='store_const', + const='list', + help=f'print one basename per line from {SYSCTL_DIR}') + _sysctl_dir_action.add_argument( + '--apply-system', + dest='sysctl_dir_action', + action='store_const', + const='apply_system', + help='reload sysctl settings from all config paths (sysctl --system)') + parser_maintenance = subparsers.add_parser( 'host-maintenance', help='Manage the maintenance state of a host') parser_maintenance.add_argument( diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index efba1cf777e..1644ceca0ef 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -144,6 +144,46 @@ class TestCephAdm(object): _cephadm.command_remove_file(ctx) assert cephadm_fs.exists(link) + def test_command_sysctl_dir_list(self, cephadm_fs, capsys): + from cephadmlib.constants import SYSCTL_DIR + cephadm_fs.create_dir(SYSCTL_DIR) + cephadm_fs.create_file(os.path.join(SYSCTL_DIR, 'c.conf')) + cephadm_fs.create_file(os.path.join(SYSCTL_DIR, 'a.conf')) + with with_cephadm_ctx( + ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--list'] + ) as ctx: + assert _cephadm.command_sysctl_dir(ctx) == 0 + assert capsys.readouterr().out.splitlines() == ['a.conf', 'c.conf'] + + def test_command_sysctl_dir_list_missing_dir(self, cephadm_fs): + import shutil + from cephadmlib.constants import SYSCTL_DIR + # cephadm_fs already has /etc (e.g. from UNIT_DIR). Only sysctl.d must be absent. + if cephadm_fs.exists(SYSCTL_DIR): + shutil.rmtree(SYSCTL_DIR) + assert not cephadm_fs.exists(SYSCTL_DIR) + with with_cephadm_ctx( + ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--list'] + ) as ctx: + with pytest.raises(_cephadm.Error, match='Not a directory'): + _cephadm.command_sysctl_dir(ctx) + + def test_command_sysctl_dir_apply_system(self, cephadm_fs): + with with_cephadm_ctx( + ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--apply-system'] + ) as ctx: + assert _cephadm.command_sysctl_dir(ctx) == 0 + + def test_command_sysctl_dir_apply_system_failure(self, cephadm_fs): + # Do not let with_cephadm_ctx re-patch cephadm.call back to success (exit 0). + with mock.patch('cephadm.call', return_value=('out', 'sysctl failed', 1)): + with with_cephadm_ctx( + ['sysctl-dir', '--fsid', '00000000-0000-0000-0000-0000deadbeef', '--apply-system'], + mock_cephadm_call_fn=False, + ) as ctx: + with pytest.raises(_cephadm.Error, match='sysctl --system failed'): + _cephadm.command_sysctl_dir(ctx) + @mock.patch('cephadm.socket.socket.bind') @mock.patch('cephadm.logger') def test_port_in_use_special_cases(self, _logger, _bind): diff --git a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py index 9db971f6f21..b1660d9e957 100644 --- a/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py +++ b/src/pybind/mgr/cephadm/tests/test_tuned_profiles.py @@ -1,11 +1,14 @@ +import asyncio import pytest import json +from contextlib import contextmanager from tests import mock from cephadm.tuned_profiles import TunedProfileUtils, SYSCTL_DIR +from cephadm.utils import cephadmNoImage from cephadm.inventory import TunedProfileStore from ceph.utils import datetime_now from ceph.deployment.service_spec import TunedProfileSpec, PlacementSpec -from cephadm.ssh import SSHManager, RemoteCommand, Executables +from cephadm.ssh import SSHManager from orchestrator import HostSpec from typing import List, Dict @@ -67,6 +70,13 @@ class FakeMgr: self.offline_hosts = [] self.log_refresh_metadata = False + @contextmanager + def async_timeout_handler(self, _host, _msg): + yield + + def wait_async(self, coro, timeout=None): + return asyncio.run(coro) + def set_store(self, what: str, value: str): raise SaveError(f'{what}: {value}') @@ -128,57 +138,48 @@ class TestTunedProfiles: ] _write_profiles.assert_has_calls(calls, any_order=True) - @mock.patch('cephadm.ssh.SSHManager.check_execute_command') - def test_rm_stray_tuned_profiles(self, _check_execute_command): + @mock.patch('cephadm.tuned_profiles.CephadmServe._run_cephadm', new_callable=mock.AsyncMock) + @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_apply_system') + @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_list') + def test_rm_stray_tuned_profiles( + self, _sysctl_dir_list, _sysctl_dir_apply_system, _run_cephadm): profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} # for this test, going to use host "a" and put 4 cephadm generated # profiles "p1" "p2", "p3" and "who" only two of which should be there ("p1", "p2") # as well as a file not generated by cephadm. Only the "p3" and "who" - # profiles should be removed from the host. This should total to 4 - # calls to check_execute_command, 1 "ls", 2 "rm", and 1 "sysctl --system" - _check_execute_command.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf', - 'p2-cephadm-tuned-profile.conf', - 'p3-cephadm-tuned-profile.conf', - 'who-cephadm-tuned-profile.conf', - 'dont-touch-me']) + # profiles should be removed via cephadm remove-file. List/apply use cephadm. + _sysctl_dir_list.return_value = '\n'.join(['p1-cephadm-tuned-profile.conf', + 'p2-cephadm-tuned-profile.conf', + 'p3-cephadm-tuned-profile.conf', + 'who-cephadm-tuned-profile.conf', + 'dont-touch-me']) + _run_cephadm.return_value = ([''], [''], 0) mgr = FakeMgr(['a', 'b', 'c'], ['a', 'b', 'c'], [], profiles) tp = TunedProfileUtils(mgr) tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) - calls = [ - mock.call( - 'a', RemoteCommand(Executables.LS, [SYSCTL_DIR]), log_command=False - ), - mock.call( - 'a', - RemoteCommand( - Executables.RM, - ['-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf'] - ) - ), + _sysctl_dir_list.assert_called_once_with('a') + _sysctl_dir_apply_system.assert_called_once_with('a') + rm_calls = [ mock.call( - 'a', - RemoteCommand( - Executables.RM, - ['-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf'] - ) - ), + 'a', cephadmNoImage, 'remove-file', + ['--path', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']), mock.call( - 'a', RemoteCommand(Executables.SYSCTL, ['--system']) - ), + 'a', cephadmNoImage, 'remove-file', + ['--path', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']), ] - _check_execute_command.assert_has_calls(calls, any_order=True) + _run_cephadm.assert_has_calls(rm_calls, any_order=True) + assert _run_cephadm.call_count == 2 - @mock.patch('cephadm.ssh.SSHManager.check_execute_command') + @mock.patch('cephadm.tuned_profiles.TunedProfileUtils._sysctl_dir_apply_system') @mock.patch('cephadm.ssh.SSHManager.write_remote_file') - def test_write_tuned_profiles(self, _write_remote_file, _check_execute_command): + def test_write_tuned_profiles(self, _write_remote_file, _sysctl_dir_apply_system): profiles = {'p1': self.tspec1, 'p2': self.tspec2, 'p3': self.tspec3} # for this test we will use host "a" and have it so host_needs_tuned_profile_update # returns True for p2 and False for p1 (see FakeCache class). So we should see - # 2 ssh calls, one to write p2, one to run sysctl --system - _check_execute_command.return_value = 'success' + # one write for p2 and sysctl-dir --apply-system via cephadm. _write_remote_file.return_value = 'success' mgr = FakeMgr(['a', 'b', 'c'], ['a', 'b', 'c'], @@ -186,9 +187,7 @@ class TestTunedProfiles: profiles) tp = TunedProfileUtils(mgr) tp._write_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2])) - _check_execute_command.assert_called_with( - 'a', RemoteCommand(Executables.SYSCTL, ['--system']) - ) + _sysctl_dir_apply_system.assert_called_once_with('a') _write_remote_file.assert_called_with( 'a', f'{SYSCTL_DIR}/p2-cephadm-tuned-profile.conf', tp._profile_to_str(self.tspec2).encode('utf-8')) diff --git a/src/pybind/mgr/cephadm/tuned_profiles.py b/src/pybind/mgr/cephadm/tuned_profiles.py index 7a37d937904..d3f07342beb 100644 --- a/src/pybind/mgr/cephadm/tuned_profiles.py +++ b/src/pybind/mgr/cephadm/tuned_profiles.py @@ -3,7 +3,8 @@ from typing import Dict, List, TYPE_CHECKING from ceph.utils import datetime_now from .schedule import HostAssignment from ceph.deployment.service_spec import ServiceSpec, TunedProfileSpec -from . import ssh +from .serve import CephadmServe +from .utils import cephadmNoImage if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -12,13 +13,33 @@ logger = logging.getLogger(__name__) SYSCTL_DIR = '/etc/sysctl.d' -SYSCTL_SYSTEM_CMD = ssh.RemoteCommand(ssh.Executables.SYSCTL, ['--system']) +SYSCTL_DIR_CEPHADM_CMD = 'sysctl-dir' class TunedProfileUtils(): def __init__(self, mgr: "CephadmOrchestrator") -> None: self.mgr = mgr + def _sysctl_dir_list(self, host: str) -> str: + with self.mgr.async_timeout_handler(host, 'cephadm sysctl-dir --list'): + out, _err, _code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + host, + cephadmNoImage, + SYSCTL_DIR_CEPHADM_CMD, + ['--list'], + log_output=self.mgr.log_refresh_metadata, + )) + return ''.join(out) + + def _sysctl_dir_apply_system(self, host: str) -> None: + with self.mgr.async_timeout_handler(host, 'cephadm sysctl-dir --apply-system'): + self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + host, + cephadmNoImage, + SYSCTL_DIR_CEPHADM_CMD, + ['--apply-system'], + )) + def _profile_to_str(self, p: TunedProfileSpec) -> str: p_str = f'# created by cephadm\n# tuned profile "{p.profile_name}"\n\n' for k, v in p.settings.items(): @@ -72,8 +93,7 @@ class TunedProfileUtils(): """ if self.mgr.cache.is_host_unreachable(host): return - cmd = ssh.RemoteCommand(ssh.Executables.LS, [SYSCTL_DIR]) - found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n') + found_files = self._sysctl_dir_list(host).split('\n') found_files = [s.strip() for s in found_files] profile_names: List[str] = sum([[*p] for p in profiles], []) # extract all profiles names profile_names = list(set(profile_names)) # remove duplicates @@ -84,11 +104,13 @@ class TunedProfileUtils(): continue if file not in expected_files: logger.info(f'Removing stray tuned profile file {file}') - cmd = ssh.RemoteCommand(ssh.Executables.RM, ['-f', f'{SYSCTL_DIR}/{file}']) - self.mgr.ssh.check_execute_command(host, cmd) + path = f'{SYSCTL_DIR}/{file}' + with self.mgr.async_timeout_handler(host, f'cephadm remove-file ({path})'): + self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( + host, cephadmNoImage, 'remove-file', ['--path', path])) updated = True if updated: - self.mgr.ssh.check_execute_command(host, SYSCTL_SYSTEM_CMD) + self._sysctl_dir_apply_system(host) def _write_tuned_profiles(self, host: str, profiles: List[Dict[str, str]]) -> None: if self.mgr.cache.is_host_unreachable(host): @@ -102,5 +124,5 @@ class TunedProfileUtils(): self.mgr.ssh.write_remote_file(host, profile_filename, content.encode('utf-8')) updated = True if updated: - self.mgr.ssh.check_execute_command(host, SYSCTL_SYSTEM_CMD) + self._sysctl_dir_apply_system(host) self.mgr.cache.last_tuned_profile_update[host] = datetime_now()