From 85d2633310c3a66415358a9872732473ac34d345 Mon Sep 17 00:00:00 2001 From: Adam King Date: Wed, 15 Jan 2025 16:42:28 -0500 Subject: [PATCH] lmgr/cephadm: validate no duplicate groups and group vs. service id for nvmeof This came as a recommendation from the nvmeof team because 1) users shouldn't have multiple nvmeof services with the same group and 2) some users may take an existing nvmeof service spec and modify the service id to point to another group but forget to update the actual group parameter. This makes it look at a glance like the two services are using different groups when they are in fact not. Usually I'd write a spec migration for this, but if we were to find users had two specs with duplicate groups or specs where the service id doesn't match the group name, I don't think there's a clear path of action to take to remedy the situation. Even just forcing the service id of an nvmeof service to be pool_name.group_name I think could cause a lot of confusion during an upgrade so I'm tentative to do it. Signed-off-by: Adam King --- src/pybind/mgr/cephadm/module.py | 32 +++++--- src/pybind/mgr/cephadm/tests/test_services.py | 76 +++++++++++++++++-- 2 files changed, 93 insertions(+), 15 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 080a1c3bf21..1efa1725c9e 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -33,12 +33,15 @@ from prettytable import PrettyTable from ceph.cephadm.images import DefaultImages from ceph.deployment import inventory from ceph.deployment.drive_group import DriveGroupSpec -from ceph.deployment.service_spec import \ - ServiceSpec, PlacementSpec, \ - HostPlacementSpec, IngressSpec, \ - TunedProfileSpec, \ - MgmtGatewaySpec, \ - NvmeofServiceSpec +from ceph.deployment.service_spec import ( + ServiceSpec, + PlacementSpec, + HostPlacementSpec, + IngressSpec, + TunedProfileSpec, + MgmtGatewaySpec, + NvmeofServiceSpec, +) from ceph.utils import str_to_datetime, datetime_to_str, datetime_now from cephadm.serve import CephadmServe from cephadm.services.cephadmservice import CephadmDaemonDeploySpec @@ -3397,13 +3400,24 @@ Then run the following: raise OrchestratorError("The 'oauth2-proxy' service depends on the 'mgmt-gateway' service, but it is not configured.") if spec.service_type == 'nvmeof': - spec = cast(NvmeofServiceSpec, spec) - assert spec.pool is not None, "Pool cannot be None for nvmeof services" + nvmeof_spec = cast(NvmeofServiceSpec, spec) + assert nvmeof_spec.pool is not None, "Pool cannot be None for nvmeof services" + assert nvmeof_spec.service_id is not None # for mypy try: - self._check_pool_exists(spec.pool, spec.service_name()) + self._check_pool_exists(nvmeof_spec.pool, nvmeof_spec.service_name()) except OrchestratorError as e: self.log.debug(f"{e}") raise + nvmeof_spec = cast(NvmeofServiceSpec, spec) + assert nvmeof_spec.service_id is not None # for mypy + if nvmeof_spec.group and not nvmeof_spec.service_id.endswith(nvmeof_spec.group): + raise OrchestratorError("The 'nvmeof' service id/name must end with '.'. Found " + f"group name '{nvmeof_spec.group}' and service id '{nvmeof_spec.service_id}'") + for sspec in [s.spec for s in self.spec_store.get_by_service_type('nvmeof')]: + nspec = cast(NvmeofServiceSpec, sspec) + if nvmeof_spec.group == nspec.group: + raise OrchestratorError(f"Cannot create nvmeof service with group {nvmeof_spec.group}. That group is already " + f"being used by the service {nspec.service_name()}") if spec.placement.count is not None: if spec.service_type in ['mon', 'mgr']: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index ebdbcc4991d..4060f51e28b 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -339,11 +339,11 @@ class TestNVMEOFService: @patch("cephadm.module.CephadmOrchestrator.get_unique_name") def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator): - nvmeof_daemon_id = 'testpool.test.qwert' pool = 'testpool' + group = 'mygroup' + nvmeof_daemon_id = f'{pool}.{group}.test.qwert' tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288' default_port = 5500 - group = 'mygroup' _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) _get_name.return_value = nvmeof_daemon_id @@ -426,7 +426,7 @@ tgt_cmd_extra_args = {tgt_cmd_extra_args} timeout = 1.0\n""" with with_host(cephadm_module, 'test'): - with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool, + with with_service(cephadm_module, NvmeofServiceSpec(service_id=f'{pool}.{group}', tgt_cmd_extra_args=tgt_cmd_extra_args, group=group, pool=pool)): @@ -437,14 +437,14 @@ timeout = 1.0\n""" [], stdin=json.dumps({ "fsid": "fsid", - "name": "nvmeof.testpool.test.qwert", + "name": "nvmeof.testpool.mygroup.test.qwert", "image": "", "deploy_arguments": [], "params": { "tcp_ports": [5500, 4420, 8009, 10008] }, "meta": { - "service_name": "nvmeof.testpool", + "service_name": "nvmeof.testpool.mygroup", "ports": [5500, 4420, 8009, 10008], "ip": None, "deployed_by": [], @@ -455,7 +455,7 @@ timeout = 1.0\n""" }, "config_blobs": { "config": "", - "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n", + "keyring": "[client.nvmeof.testpool.mygroup.test.qwert]\nkey = None\n", "files": { "ceph-nvmeof.conf": nvmeof_gateway_conf } @@ -465,6 +465,70 @@ timeout = 1.0\n""" use_current_daemon_image=False, ) + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_validate_no_group_duplicate_on_apply(self, cephadm_module: CephadmOrchestrator): + nvmeof_spec_group1 = NvmeofServiceSpec( + service_id='testpool.testgroup', + group='testgroup', + pool='testpool' + ) + nvmeof_spec_also_group1 = NvmeofServiceSpec( + service_id='testpool2.testgroup', + group='testgroup', + pool='testpool2' + ) + with with_host(cephadm_module, 'test'): + out = cephadm_module._apply_service_spec(nvmeof_spec_group1) + assert out == 'Scheduled nvmeof.testpool.testgroup update...' + nvmeof_specs = cephadm_module.spec_store.get_by_service_type('nvmeof') + assert len(nvmeof_specs) == 1 + assert nvmeof_specs[0].spec.service_name() == 'nvmeof.testpool.testgroup' + with pytest.raises( + OrchestratorError, + match='Cannot create nvmeof service with group testgroup. That group is already ' + 'being used by the service nvmeof.testpool.testgroup' + ): + cephadm_module._apply_service_spec(nvmeof_spec_also_group1) + assert len(cephadm_module.spec_store.get_by_service_type('nvmeof')) == 1 + + @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) + def test_validate_service_id_matches_group_on_apply(self, cephadm_module: CephadmOrchestrator): + matching_nvmeof_spec_group_service_id = NvmeofServiceSpec( + service_id='pool1.right_group', + group='right_group', + pool='pool1' + ) + mismatch_nvmeof_spec_group_service_id = NvmeofServiceSpec( + service_id='pool2.wrong_group', + group='right_group', + pool='pool2' + ) + matching_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec( + service_id='pool3.right.group', + group='right.group', + pool='pool3' + ) + mismatch_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec( + service_id='pool4.wrong.group', + group='right.group', + pool='pool4' + ) + with with_host(cephadm_module, 'test'): + cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id) + with pytest.raises( + OrchestratorError, + match='The \'nvmeof\' service id/name must end with \'.\'. Found ' + 'group name \'right_group\' and service id \'pool2.wrong_group\'' + ): + cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id) + cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id_with_dot) + with pytest.raises( + OrchestratorError, + match='The \'nvmeof\' service id/name must end with \'.\'. Found ' + 'group name \'right.group\' and service id \'pool4.wrong.group\'' + ): + cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id_with_dot) + class TestMonitoring: def _get_config(self, url: str) -> str: -- 2.39.5