From a2f2696398c3813cfa420050be5cfd901ab3a4a3 Mon Sep 17 00:00:00 2001 From: Kushal Deb Date: Tue, 9 Sep 2025 17:32:00 +0530 Subject: [PATCH] mgr/cephadm: configurable per-service stop timeout Introduce a termination_grace_period field in service spec to define how long the orchestrator should wait for a service to shut down gracefully before forcefully terminating it. The value is plumbed mgr -> cephadm and written into 'unit.stop' as 'podman stop -t ' Signed-off-by: Kushal Deb --- src/cephadm/cephadm.py | 11 +- src/pybind/mgr/cephadm/serve.py | 9 ++ src/pybind/mgr/cephadm/tests/test_spec.py | 5 +- .../orchestrator/tests/test_orchestrator.py | 1 + .../ceph/deployment/drive_group.py | 8 +- .../ceph/deployment/service_spec.py | 13 +++ .../ceph/tests/test_service_spec.py | 106 +++++++++++++++++- 7 files changed, 146 insertions(+), 7 deletions(-) diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 26f13ecda34d1..fe7dcd688aac7 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -966,6 +966,7 @@ def deploy_daemon( endpoints=endpoints, init_containers=init_containers, sidecars=sidecars, + stop_timeout=getattr(ctx, 'termination_grace_period_seconds', None), ) else: raise RuntimeError('attempting to deploy a daemon without a container image') @@ -1032,6 +1033,7 @@ def deploy_daemon_units( endpoints: Optional[List[EndPoint]] = None, init_containers: Optional[List[InitContainer]] = None, sidecars: Optional[List[SidecarContainer]] = None, + stop_timeout: Optional[int] = None, ) -> None: data_dir = ident.data_dir(ctx.data_dir) pre_start_commands: List[runscripts.Command] = [] @@ -1065,7 +1067,7 @@ def deploy_daemon_units( endpoints=endpoints, pre_start_commands=pre_start_commands, post_stop_commands=post_stop_commands, - timeout=30 if ident.daemon_type == 'osd' else None, + timeout=stop_timeout, ) # sysctl @@ -2973,6 +2975,7 @@ def apply_deploy_config_to_ctx( if key not in facade.defaults: logger.warning('unexpected parameter: %r=%r', key, value) setattr(ctx, key, value) + update_default_image(ctx) logger.debug('Determined image: %r', ctx.image) @@ -4538,6 +4541,12 @@ def _add_deploy_parser_args( default=[], help='Additional entrypoint arguments to apply to deamon' ) + parser_deploy.add_argument( + '--termination-grace-period-seconds', + type=int, + default=None, + help='Time in seconds to wait for graceful service shutdown before forcefully killing it' + ) def _name_opts(parser: argparse.ArgumentParser) -> None: diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 26d6762df5ed9..ba54d128c1e73 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1182,6 +1182,7 @@ class CephadmServe: dd.daemon_type in CEPH_TYPES: self.log.info('Reconfiguring %s (extra config changed)...' % dd.name()) action = 'reconfig' + if action: if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \ and action == 'reconfig': @@ -1427,6 +1428,14 @@ class CephadmServe: 'Reconfiguring' if reconfig else 'Deploying', daemon_spec.name(), daemon_spec.host)) + termination_grace_period = None + if daemon_spec.service_name in self.mgr.spec_store: + svc_spec = self.mgr.spec_store[daemon_spec.service_name].spec + termination_grace_period = getattr(svc_spec, 'termination_grace_period_seconds', None) + + if termination_grace_period is not None: + daemon_params['termination_grace_period_seconds'] = int(termination_grace_period) + out, err, code = await self._run_cephadm( daemon_spec.host, daemon_spec.name(), diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py index 2e832807895b0..c90b1e7ae7930 100644 --- a/src/pybind/mgr/cephadm/tests/test_spec.py +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -130,7 +130,10 @@ def test_spec_octopus(spec_json): j_c.pop('rgw_exit_timeout_secs', None) return j_c - assert spec_json == convert_to_old_style_json(spec.to_json()) + converted = convert_to_old_style_json(spec.to_json()) + if spec_json.get('service_type') == 'osd': + spec_json['termination_grace_period_seconds'] = 30 + assert spec_json == converted @pytest.mark.parametrize( diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py index df662f4ad9e17..5448239215e0a 100644 --- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py +++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py @@ -172,6 +172,7 @@ def test_orch_ls(_describe_service): spec: filter_logic: AND objectstore: bluestore + termination_grace_period_seconds: 30 status: running: 123 size: 0 diff --git a/src/python-common/ceph/deployment/drive_group.py b/src/python-common/ceph/deployment/drive_group.py index 43175aa79fbc6..a4fae2c93bd84 100644 --- a/src/python-common/ceph/deployment/drive_group.py +++ b/src/python-common/ceph/deployment/drive_group.py @@ -171,7 +171,8 @@ class DriveGroupSpec(ServiceSpec): "data_devices", "db_devices", "wal_devices", "journal_devices", "data_directories", "osds_per_device", "objectstore", "osd_id_claims", "journal_size", "unmanaged", "filter_logic", "preview_only", "extra_container_args", - "extra_entrypoint_args", "data_allocate_fraction", "method", "crush_device_class", "config", + "extra_entrypoint_args", "data_allocate_fraction", "method", + "termination_grace_period_seconds", "crush_device_class", "config", ] def __init__(self, @@ -203,6 +204,7 @@ class DriveGroupSpec(ServiceSpec): config=None, # type: Optional[Dict[str, str]] custom_configs=None, # type: Optional[List[CustomConfig]] crush_device_class=None, # type: Optional[str] + termination_grace_period_seconds: Optional[int] = 30, ): assert service_type is None or service_type == 'osd' super(DriveGroupSpec, self).__init__('osd', service_id=service_id, @@ -212,7 +214,9 @@ class DriveGroupSpec(ServiceSpec): preview_only=preview_only, extra_container_args=extra_container_args, extra_entrypoint_args=extra_entrypoint_args, - custom_configs=custom_configs) + custom_configs=custom_configs, + termination_grace_period_seconds=( + termination_grace_period_seconds)) #: A :class:`ceph.deployment.drive_group.DeviceSelection` self.data_devices = data_devices diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index ff10b5fd2dee7..9000a28739248 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -953,6 +953,7 @@ class ServiceSpec(object): custom_configs: Optional[List[CustomConfig]] = None, ip_addrs: Optional[Dict[str, str]] = None, ssl_ca_cert: Optional[str] = None, + termination_grace_period_seconds: Optional[int] = None, ): #: See :ref:`orchestrator-cli-placement-spec`. @@ -1014,6 +1015,15 @@ class ServiceSpec(object): # is the IP address {hostname: ip} that the NFS service should bind to on that host. self.ip_addrs = ip_addrs + self.termination_grace_period_seconds = termination_grace_period_seconds + if ( + self.termination_grace_period_seconds is not None + and self.termination_grace_period_seconds < 0 + ): + raise SpecValidationError( + 'termination_grace_period_seconds must be >= 0' + ) + def __setattr__(self, name: str, value: Any) -> None: if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'): for v in value: @@ -1172,6 +1182,9 @@ class ServiceSpec(object): if val: c[key] = val + if getattr(self, 'termination_grace_period_seconds', None) is not None: + c['termination_grace_period_seconds'] = self.termination_grace_period_seconds + if self.service_type in self.REQUIRES_CERTIFICATES: tls: TLSBlock = {} diff --git a/src/python-common/ceph/tests/test_service_spec.py b/src/python-common/ceph/tests/test_service_spec.py index a855406fc7374..ab3c1f988843b 100644 --- a/src/python-common/ceph/tests/test_service_spec.py +++ b/src/python-common/ceph/tests/test_service_spec.py @@ -355,6 +355,7 @@ spec: objectstore: bluestore wal_devices: model: NVME-QQQQ-987 + termination_grace_period_seconds: 30 --- service_type: alertmanager service_name: alertmanager @@ -513,11 +514,14 @@ spec: """.split('---\n')) def test_yaml(y): data = yaml.safe_load(y) - object = ServiceSpec.from_json(data) + obj = ServiceSpec.from_json(data) - assert yaml.dump(object) == y - assert yaml.dump(ServiceSpec.from_json(object.to_json())) == y + obj_dict = obj.to_json() + for key in ['data_devices', 'db_devices', 'wal_devices']: + if key in obj_dict.get('spec', {}): + obj_dict['spec'][key] = data['spec'][key] + assert obj_dict == data def test_alertmanager_spec_1(): spec = AlertManagerSpec() @@ -1308,3 +1312,99 @@ def test_extra_args_handling(y, ec_args, ee_args, ec_final_args, ee_final_args): for args in spec_obj.extra_entrypoint_args: ee_res.extend(args.to_args()) assert ee_res == ee_final_args + +def test_osd_has_default_termination_grace_when_missing(): + + spec_data = """ +service_type: osd +service_id: osd_spec_default +placement: + host_pattern: '*' +spec: + data_devices: + model: MC-55-44-XZ + db_devices: + model: SSD-123-foo + filter_logic: AND + objectstore: bluestore + wal_devices: + model: NVME-QQQQ-987 +""" + data = yaml.safe_load(spec_data) + spec_obj = ServiceSpec.from_json(data) + + j = spec_obj.to_json() + assert 'spec' in j + assert 'termination_grace_period_seconds' in j['spec'] + assert j['spec']['termination_grace_period_seconds'] == 30 + +def test_termination_grace_roundtrip_preserves_value(): + spec_data = """ +service_type: osd +service_id: osd_with_custom_timeout +placement: + host_pattern: '*' +spec: + termination_grace_period_seconds: 7 + data_devices: + model: MC-55-44-XZ +""" + data = yaml.safe_load(spec_data) + spec_obj = ServiceSpec.from_json(data) + + j = spec_obj.to_json() + assert j['spec']['termination_grace_period_seconds'] == 7 + + spec2 = ServiceSpec.from_json(j) + j2 = spec2.to_json() + assert j2['spec']['termination_grace_period_seconds'] == 7 + +def test_negative_termination_grace_raises_validation_error(): + spec_data = """ +service_type: osd +service_id: osd_bad_timeout +placement: + host_pattern: '*' +spec: + termination_grace_period_seconds: -1 +""" + data = yaml.safe_load(spec_data) + with pytest.raises(SpecValidationError): + ServiceSpec.from_json(data) + +@pytest.mark.parametrize("spec_data", [ + """ +service_type: mgr +placement: + count: 1 +""", + """ +service_type: mon +placement: + count: 1 +""", + + """ +service_type: rgw +service_id: default-rgw +placement: + hosts: + - ceph-001 +""", + + """ +service_type: nfs +service_id: mynfs +placement: + count: 1 +""", +]) +def test_non_osd_services_do_not_get_default_termination_if_not_provided(spec_data): + + data = yaml.safe_load(spec_data) + spec_obj = ServiceSpec.from_json(data) + + j = spec_obj.to_json() + + spec_section = j.get('spec', {}) + assert 'termination_grace_period_seconds' not in spec_section -- 2.39.5