From a08c0fbc185b668ffd60b8dece665d3b83942b13 Mon Sep 17 00:00:00 2001 From: Adam King Date: Mon, 20 Dec 2021 01:39:08 -0500 Subject: [PATCH] mgr/cephadm: allow miscellaneous container args at service level Fixes: https://tracker.ceph.com/issues/51566 Signed-off-by: Adam King Conflicts: src/pybind/mgr/cephadm/module.py src/pybind/mgr/cephadm/tests/test_services.py --- doc/cephadm/services/index.rst | 26 ++++++++++++ src/cephadm/cephadm | 41 ++++++++++++++----- src/pybind/mgr/cephadm/serve.py | 19 ++++++++- .../mgr/cephadm/services/cephadmservice.py | 12 +++++- src/pybind/mgr/cephadm/tests/test_cephadm.py | 19 ++++++++- src/pybind/mgr/cephadm/tests/test_services.py | 16 ++++---- src/pybind/mgr/orchestrator/_interface.py | 3 ++ .../ceph/deployment/service_spec.py | 5 +++ 8 files changed, 119 insertions(+), 22 deletions(-) diff --git a/doc/cephadm/services/index.rst b/doc/cephadm/services/index.rst index 3f08bf583bfce..47972b49e6a6d 100644 --- a/doc/cephadm/services/index.rst +++ b/doc/cephadm/services/index.rst @@ -455,6 +455,32 @@ candidate hosts. If there are fewer hosts selected by the placement specification than demanded by ``count``, cephadm will deploy only on the selected hosts. +Extra Container Arguments +========================= + +.. warning:: + The arguments provided for extra container args are limited to whatever arguments are available for a `run` command from whichever container engine you are using. Providing any arguments the `run` command does not support (or invalid values for arguments) will cause the daemon to fail to start. + + +Cephadm supports providing extra miscellaneous container arguments for +specific cases when they may be necessary. For example, if a user needed +to limit the amount of cpus their mon daemons make use of they could apply +a spec like + +.. code-block:: yaml + + service_type: mon + service_name: mon + placement: + hosts: + - host1 + - host2 + - host3 + extra_container_args: + - "--cpus=2" + +which would cause each mon daemon to be deployed with `--cpus=2`. + .. _orch-rm: Removing a Service diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 4889ec5e1eb9d..8b5fdd917aea3 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -4718,6 +4718,19 @@ def extract_uid_gid_monitoring(ctx, daemon_type): return uid, gid +def get_container_with_extra_args(ctx: CephadmContext, + fsid: str, daemon_type: str, daemon_id: Union[int, str], + privileged: bool = False, + ptrace: bool = False, + container_args: Optional[List[str]] = None) -> 'CephContainer': + # wrapper for get_container that additionally adds extra_container_args if present + # used for deploying daemons with additional podman/docker container arguments + c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args) + if 'extra_container_args' in ctx and ctx.extra_container_args: + c.container_args.extend(ctx.extra_container_args) + return c + + @default_image def command_deploy(ctx): # type: (CephadmContext) -> None @@ -4756,8 +4769,8 @@ def command_deploy(ctx): uid, gid = extract_uid_gid(ctx) make_var_run(ctx, ctx.fsid, uid, gid) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, - ptrace=ctx.allow_ptrace) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id, + ptrace=ctx.allow_ptrace) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, osd_fsid=ctx.osd_fsid, @@ -4781,7 +4794,7 @@ def command_deploy(ctx): 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args))) uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, reconfig=ctx.reconfig, ports=daemon_ports) @@ -4793,7 +4806,7 @@ def command_deploy(ctx): config, keyring = get_config_and_keyring(ctx) # TODO: extract ganesha uid/gid (997, 994) ? uid, gid = extract_uid_gid(ctx) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, reconfig=ctx.reconfig, @@ -4802,7 +4815,7 @@ def command_deploy(ctx): elif daemon_type == CephIscsi.daemon_type: config, keyring = get_config_and_keyring(ctx) uid, gid = extract_uid_gid(ctx) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, config=config, keyring=keyring, reconfig=ctx.reconfig, @@ -4811,7 +4824,7 @@ def command_deploy(ctx): elif daemon_type == HAproxy.daemon_type: haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) uid, gid = haproxy.extract_uid_gid_haproxy() - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, reconfig=ctx.reconfig, ports=daemon_ports) @@ -4819,7 +4832,7 @@ def command_deploy(ctx): elif daemon_type == Keepalived.daemon_type: keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) uid, gid = keepalived.extract_uid_gid_keepalived() - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, reconfig=ctx.reconfig, ports=daemon_ports) @@ -4828,9 +4841,9 @@ def command_deploy(ctx): cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) if not ctx.reconfig and not redeploy: daemon_ports.extend(cc.ports) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, - privileged=cc.privileged, - ptrace=ctx.allow_ptrace) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id, + privileged=cc.privileged, + ptrace=ctx.allow_ptrace) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid=cc.uid, gid=cc.gid, config=None, keyring=None, reconfig=ctx.reconfig, @@ -4852,7 +4865,7 @@ def command_deploy(ctx): elif daemon_type == SNMPGateway.daemon_type: sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id) - c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) + c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, sc.uid, sc.gid, ports=daemon_ports) @@ -8579,6 +8592,12 @@ def _get_parser(): '--meta-json', help='JSON dict of additional metadata' ) + parser_deploy.add_argument( + '--extra-container-args', + action='append', + default=[], + help='Additional container arguments to apply to deamon' + ) parser_check_host = subparsers.add_parser( 'check-host', help='check host configuration') diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index f0b57f7e951d2..cd637ba9ed335 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -328,6 +328,7 @@ class CephadmServe: sd.rank = int(d['rank']) if d.get('rank') is not None else None sd.rank_generation = int(d['rank_generation']) if d.get( 'rank_generation') is not None else None + sd.extra_container_args = d.get('extra_container_args') if 'state' in d: sd.status_desc = d['state'] sd.status = { @@ -858,6 +859,12 @@ class CephadmServe: self.log.info('Reconfiguring %s (dependencies changed)...' % ( dd.name())) action = 'reconfig' + elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args: + self.log.debug( + f'{dd.name()} container cli args {dd.extra_container_args} -> {spec.extra_container_args}') + self.log.info(f'Redeploying {dd.name()}, (container cli args changed) . . .') + dd.extra_container_args = spec.extra_container_args + action = 'redeploy' elif self.mgr.last_monmap and \ self.mgr.last_monmap > last_config and \ dd.daemon_type in CEPH_TYPES: @@ -1073,6 +1080,14 @@ class CephadmServe: if self.mgr.allow_ptrace: daemon_spec.extra_args.append('--allow-ptrace') + try: + eca = daemon_spec.extra_container_args + if eca: + for a in eca: + daemon_spec.extra_args.append(f'--extra-container-args={a}') + except AttributeError: + eca = None + if self.mgr.cache.host_needs_registry_login(daemon_spec.host) and self.mgr.registry_url: self._registry_login(daemon_spec.host, self.mgr.registry_url, self.mgr.registry_username, self.mgr.registry_password) @@ -1092,11 +1107,13 @@ class CephadmServe: 'deployed_by': self.mgr.get_active_mgr_digests(), 'rank': daemon_spec.rank, 'rank_generation': daemon_spec.rank_generation, + 'extra_container_args': eca }), '--config-json', '-', ] + daemon_spec.extra_args, stdin=json.dumps(daemon_spec.final_config), - image=image) + image=image, + ) # refresh daemon state? (ceph daemon reconfig does not need it) if not reconfig or daemon_spec.daemon_type not in CEPH_TYPES: diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 65dd7b0f7027b..72f1b0c40f002 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -37,7 +37,8 @@ class CephadmDaemonDeploySpec: ip: Optional[str] = None, ports: Optional[List[int]] = None, rank: Optional[int] = None, - rank_generation: Optional[int] = None): + rank_generation: Optional[int] = None, + extra_container_args: Optional[List[str]] = None): """ A data struction to encapsulate `cephadm deploy ... """ @@ -72,6 +73,8 @@ class CephadmDaemonDeploySpec: self.rank: Optional[int] = rank self.rank_generation: Optional[int] = rank_generation + self.extra_container_args = extra_container_args + def name(self) -> str: return '%s.%s' % (self.daemon_type, self.daemon_id) @@ -96,6 +99,7 @@ class CephadmDaemonDeploySpec: ports=dd.ports, rank=dd.rank, rank_generation=dd.rank_generation, + extra_container_args=dd.extra_container_args, ) def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription: @@ -110,6 +114,7 @@ class CephadmDaemonDeploySpec: ports=self.ports, rank=self.rank, rank_generation=self.rank_generation, + extra_container_args=self.extra_container_args, ) @@ -171,6 +176,10 @@ class CephadmService(metaclass=ABCMeta): rank: Optional[int] = None, rank_generation: Optional[int] = None, ) -> CephadmDaemonDeploySpec: + try: + eca = spec.extra_container_args + except AttributeError: + eca = None return CephadmDaemonDeploySpec( host=host, daemon_id=daemon_id, @@ -181,6 +190,7 @@ class CephadmService(metaclass=ABCMeta): ip=ip, rank=rank, rank_generation=rank_generation, + extra_container_args=eca, ) def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index d6850ae818ffa..3bd67695438b0 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -435,7 +435,7 @@ class TestCephadm(object): _run_cephadm.assert_called_with( 'test', 'mon.test', 'deploy', [ '--name', 'mon.test', - '--meta-json', '{"service_name": "mon", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '--meta-json', '{"service_name": "mon", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--reconfig', ], @@ -443,6 +443,23 @@ class TestCephadm(object): + '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}', image='') + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_extra_container_args(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + with with_host(cephadm_module, 'test'): + with with_service(cephadm_module, ServiceSpec(service_type='crash', extra_container_args=['--cpus=2', '--quiet']), CephadmOrchestrator.apply_crash): + _run_cephadm.assert_called_with( + 'test', 'crash.test', 'deploy', [ + '--name', 'crash.test', + '--meta-json', '{"service_name": "crash", "ports": [], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": ["--cpus=2", "--quiet"]}', + '--config-json', '-', + '--extra-container-args=--cpus=2', + '--extra-container-args=--quiet' + ], + stdin='{"config": "", "keyring": ""}', + image='', + ) + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 969c06f996f32..6a0750430402d 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -274,7 +274,7 @@ class TestMonitoring: 'deploy', [ '--name', 'alertmanager.test', - '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9093 9094' ], stdin=json.dumps({"files": {"alertmanager.yml": y}, "peers": []}), @@ -318,7 +318,7 @@ class TestMonitoring: [ '--name', 'prometheus.test', '--meta-json', - '{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9095' ], @@ -388,7 +388,7 @@ class TestMonitoring: [ '--name', 'grafana.test', '--meta-json', - '{"service_name": "grafana", "ports": [3000], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "grafana", "ports": [3000], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '3000'], stdin=json.dumps({"files": files}), image='') @@ -460,7 +460,7 @@ spec: _run_cephadm.assert_called_with( 'test', 'alertmanager.test', 'deploy', [ '--name', 'alertmanager.test', - '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '4200 9094', '--reconfig' @@ -534,7 +534,7 @@ class TestSNMPGateway: [ '--name', 'snmp-gateway.test', '--meta-json', - '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9464' ], @@ -569,7 +569,7 @@ class TestSNMPGateway: [ '--name', 'snmp-gateway.test', '--meta-json', - '{"service_name": "snmp-gateway", "ports": [9465], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "snmp-gateway", "ports": [9465], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9465' ], @@ -608,7 +608,7 @@ class TestSNMPGateway: [ '--name', 'snmp-gateway.test', '--meta-json', - '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9464' ], @@ -652,7 +652,7 @@ class TestSNMPGateway: [ '--name', 'snmp-gateway.test', '--meta-json', - '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '{"service_name": "snmp-gateway", "ports": [9464], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null}', '--config-json', '-', '--tcp-ports', '9464' ], diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 006b670038355..a1b1893db0cdf 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -844,6 +844,7 @@ class DaemonDescription(object): deployed_by: Optional[List[str]] = None, rank: Optional[int] = None, rank_generation: Optional[int] = None, + extra_container_args: Optional[List[str]] = None, ) -> None: #: Host is at the same granularity as InventoryHost @@ -906,6 +907,8 @@ class DaemonDescription(object): self.is_active = is_active + self.extra_container_args = extra_container_args + @property def status(self) -> Optional[DaemonDescriptionStatus]: return self._status diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 8cd264cc77b7d..1c9a97036608d 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -490,6 +490,7 @@ class ServiceSpec(object): unmanaged: bool = False, preview_only: bool = False, networks: Optional[List[str]] = None, + extra_container_args: Optional[List[str]] = None, ): #: See :ref:`orchestrator-cli-placement-spec`. @@ -528,6 +529,8 @@ class ServiceSpec(object): if config: self.config = {k.replace(' ', '_'): v for k, v in config.items()} + self.extra_container_args: Optional[List[str]] = extra_container_args + @classmethod @handle_type_error def from_json(cls: Type[ServiceSpecT], json_spec: Dict) -> ServiceSpecT: @@ -650,6 +653,8 @@ class ServiceSpec(object): ret['unmanaged'] = self.unmanaged if self.networks: ret['networks'] = self.networks + if self.extra_container_args: + ret['extra_container_args'] = self.extra_container_args c = {} for key, val in sorted(self.__dict__.items(), key=lambda tpl: tpl[0]): -- 2.39.5