Introduce a termination_grace_period field in service spec to define how long the
orchestrator should wait for a service to shut down gracefully before forcefully terminating it.
The value is plumbed mgr -> cephadm and written into 'unit.stop' as 'podman stop -t <N>'
Signed-off-by: Kushal Deb <Kushal.Deb@ibm.com>
endpoints=endpoints,
init_containers=init_containers,
sidecars=sidecars,
+ stop_timeout=getattr(ctx, 'termination_grace_period_seconds', None),
)
else:
raise RuntimeError('attempting to deploy a daemon without a container image')
endpoints: Optional[List[EndPoint]] = None,
init_containers: Optional[List[InitContainer]] = None,
sidecars: Optional[List[SidecarContainer]] = None,
+ stop_timeout: Optional[int] = None,
) -> None:
data_dir = ident.data_dir(ctx.data_dir)
pre_start_commands: List[runscripts.Command] = []
endpoints=endpoints,
pre_start_commands=pre_start_commands,
post_stop_commands=post_stop_commands,
- timeout=30 if ident.daemon_type == 'osd' else None,
+ timeout=stop_timeout,
)
# sysctl
if key not in facade.defaults:
logger.warning('unexpected parameter: %r=%r', key, value)
setattr(ctx, key, value)
+
update_default_image(ctx)
logger.debug('Determined image: %r', ctx.image)
default=[],
help='Additional entrypoint arguments to apply to deamon'
)
+ parser_deploy.add_argument(
+ '--termination-grace-period-seconds',
+ type=int,
+ default=None,
+ help='Time in seconds to wait for graceful service shutdown before forcefully killing it'
+ )
def _name_opts(parser: argparse.ArgumentParser) -> None:
dd.daemon_type in CEPH_TYPES:
self.log.info('Reconfiguring %s (extra config changed)...' % dd.name())
action = 'reconfig'
+
if action:
if self.mgr.cache.get_scheduled_daemon_action(dd.hostname, dd.name()) == 'redeploy' \
and action == 'reconfig':
'Reconfiguring' if reconfig else 'Deploying',
daemon_spec.name(), daemon_spec.host))
+ termination_grace_period = None
+ if daemon_spec.service_name in self.mgr.spec_store:
+ svc_spec = self.mgr.spec_store[daemon_spec.service_name].spec
+ termination_grace_period = getattr(svc_spec, 'termination_grace_period_seconds', None)
+
+ if termination_grace_period is not None:
+ daemon_params['termination_grace_period_seconds'] = int(termination_grace_period)
+
out, err, code = await self._run_cephadm(
daemon_spec.host,
daemon_spec.name(),
j_c.pop('rgw_exit_timeout_secs', None)
return j_c
- assert spec_json == convert_to_old_style_json(spec.to_json())
+ converted = convert_to_old_style_json(spec.to_json())
+ if spec_json.get('service_type') == 'osd':
+ spec_json['termination_grace_period_seconds'] = 30
+ assert spec_json == converted
@pytest.mark.parametrize(
spec:
filter_logic: AND
objectstore: bluestore
+ termination_grace_period_seconds: 30
status:
running: 123
size: 0
"data_devices", "db_devices", "wal_devices", "journal_devices",
"data_directories", "osds_per_device", "objectstore", "osd_id_claims",
"journal_size", "unmanaged", "filter_logic", "preview_only", "extra_container_args",
- "extra_entrypoint_args", "data_allocate_fraction", "method", "crush_device_class", "config",
+ "extra_entrypoint_args", "data_allocate_fraction", "method",
+ "termination_grace_period_seconds", "crush_device_class", "config",
]
def __init__(self,
config=None, # type: Optional[Dict[str, str]]
custom_configs=None, # type: Optional[List[CustomConfig]]
crush_device_class=None, # type: Optional[str]
+ termination_grace_period_seconds: Optional[int] = 30,
):
assert service_type is None or service_type == 'osd'
super(DriveGroupSpec, self).__init__('osd', service_id=service_id,
preview_only=preview_only,
extra_container_args=extra_container_args,
extra_entrypoint_args=extra_entrypoint_args,
- custom_configs=custom_configs)
+ custom_configs=custom_configs,
+ termination_grace_period_seconds=(
+ termination_grace_period_seconds))
#: A :class:`ceph.deployment.drive_group.DeviceSelection`
self.data_devices = data_devices
custom_configs: Optional[List[CustomConfig]] = None,
ip_addrs: Optional[Dict[str, str]] = None,
ssl_ca_cert: Optional[str] = None,
+ termination_grace_period_seconds: Optional[int] = None,
):
#: See :ref:`orchestrator-cli-placement-spec`.
# is the IP address {hostname: ip} that the NFS service should bind to on that host.
self.ip_addrs = ip_addrs
+ self.termination_grace_period_seconds = termination_grace_period_seconds
+ if (
+ self.termination_grace_period_seconds is not None
+ and self.termination_grace_period_seconds < 0
+ ):
+ raise SpecValidationError(
+ 'termination_grace_period_seconds must be >= 0'
+ )
+
def __setattr__(self, name: str, value: Any) -> None:
if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'):
for v in value:
if val:
c[key] = val
+ if getattr(self, 'termination_grace_period_seconds', None) is not None:
+ c['termination_grace_period_seconds'] = self.termination_grace_period_seconds
+
if self.service_type in self.REQUIRES_CERTIFICATES:
tls: TLSBlock = {}
objectstore: bluestore
wal_devices:
model: NVME-QQQQ-987
+ termination_grace_period_seconds: 30
---
service_type: alertmanager
service_name: alertmanager
""".split('---\n'))
def test_yaml(y):
data = yaml.safe_load(y)
- object = ServiceSpec.from_json(data)
+ obj = ServiceSpec.from_json(data)
- assert yaml.dump(object) == y
- assert yaml.dump(ServiceSpec.from_json(object.to_json())) == y
+ obj_dict = obj.to_json()
+ for key in ['data_devices', 'db_devices', 'wal_devices']:
+ if key in obj_dict.get('spec', {}):
+ obj_dict['spec'][key] = data['spec'][key]
+ assert obj_dict == data
def test_alertmanager_spec_1():
spec = AlertManagerSpec()
for args in spec_obj.extra_entrypoint_args:
ee_res.extend(args.to_args())
assert ee_res == ee_final_args
+
+def test_osd_has_default_termination_grace_when_missing():
+
+ spec_data = """
+service_type: osd
+service_id: osd_spec_default
+placement:
+ host_pattern: '*'
+spec:
+ data_devices:
+ model: MC-55-44-XZ
+ db_devices:
+ model: SSD-123-foo
+ filter_logic: AND
+ objectstore: bluestore
+ wal_devices:
+ model: NVME-QQQQ-987
+"""
+ data = yaml.safe_load(spec_data)
+ spec_obj = ServiceSpec.from_json(data)
+
+ j = spec_obj.to_json()
+ assert 'spec' in j
+ assert 'termination_grace_period_seconds' in j['spec']
+ assert j['spec']['termination_grace_period_seconds'] == 30
+
+def test_termination_grace_roundtrip_preserves_value():
+ spec_data = """
+service_type: osd
+service_id: osd_with_custom_timeout
+placement:
+ host_pattern: '*'
+spec:
+ termination_grace_period_seconds: 7
+ data_devices:
+ model: MC-55-44-XZ
+"""
+ data = yaml.safe_load(spec_data)
+ spec_obj = ServiceSpec.from_json(data)
+
+ j = spec_obj.to_json()
+ assert j['spec']['termination_grace_period_seconds'] == 7
+
+ spec2 = ServiceSpec.from_json(j)
+ j2 = spec2.to_json()
+ assert j2['spec']['termination_grace_period_seconds'] == 7
+
+def test_negative_termination_grace_raises_validation_error():
+ spec_data = """
+service_type: osd
+service_id: osd_bad_timeout
+placement:
+ host_pattern: '*'
+spec:
+ termination_grace_period_seconds: -1
+"""
+ data = yaml.safe_load(spec_data)
+ with pytest.raises(SpecValidationError):
+ ServiceSpec.from_json(data)
+
+@pytest.mark.parametrize("spec_data", [
+ """
+service_type: mgr
+placement:
+ count: 1
+""",
+ """
+service_type: mon
+placement:
+ count: 1
+""",
+
+ """
+service_type: rgw
+service_id: default-rgw
+placement:
+ hosts:
+ - ceph-001
+""",
+
+ """
+service_type: nfs
+service_id: mynfs
+placement:
+ count: 1
+""",
+])
+def test_non_osd_services_do_not_get_default_termination_if_not_provided(spec_data):
+
+ data = yaml.safe_load(spec_data)
+ spec_obj = ServiceSpec.from_json(data)
+
+ j = spec_obj.to_json()
+
+ spec_section = j.get('spec', {})
+ assert 'termination_grace_period_seconds' not in spec_section