]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
lmgr/cephadm: validate no duplicate groups and group vs. service id for nvmeof
authorAdam King <adking@redhat.com>
Wed, 15 Jan 2025 21:42:28 +0000 (16:42 -0500)
committerAdam King <adking@redhat.com>
Wed, 5 Feb 2025 22:14:34 +0000 (17:14 -0500)
This came as a recommendation from the nvmeof team because

1) users shouldn't have multiple nvmeof services with the same group

and

2) some users may take an existing nvmeof service spec and modify
the service id to point to another group but forget to update
the actual group parameter. This makes it look at a glance like
the two services are using different groups when they are in fact not.

Usually I'd write a spec migration for this, but if we were to find
users had two specs with duplicate groups or specs where the service
id doesn't match the group name, I don't think there's a clear path
of action to take to remedy the situation. Even just forcing the
service id of an nvmeof service to be pool_name.group_name I think
could cause a lot of confusion during an upgrade so I'm tentative
to do it.

Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/tests/test_services.py

index 080a1c3bf219f795a76b7e2e893d046d024a2d86..1efa1725c9ec5f2dc0ffee2933e79201f1a802af 100644 (file)
@@ -33,12 +33,15 @@ from prettytable import PrettyTable
 from ceph.cephadm.images import DefaultImages
 from ceph.deployment import inventory
 from ceph.deployment.drive_group import DriveGroupSpec
-from ceph.deployment.service_spec import \
-    ServiceSpec, PlacementSpec, \
-    HostPlacementSpec, IngressSpec, \
-    TunedProfileSpec, \
-    MgmtGatewaySpec, \
-    NvmeofServiceSpec
+from ceph.deployment.service_spec import (
+    ServiceSpec,
+    PlacementSpec,
+    HostPlacementSpec,
+    IngressSpec,
+    TunedProfileSpec,
+    MgmtGatewaySpec,
+    NvmeofServiceSpec,
+)
 from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
 from cephadm.serve import CephadmServe
 from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
@@ -3397,13 +3400,24 @@ Then run the following:
                 raise OrchestratorError("The 'oauth2-proxy' service depends on the 'mgmt-gateway' service, but it is not configured.")
 
         if spec.service_type == 'nvmeof':
-            spec = cast(NvmeofServiceSpec, spec)
-            assert spec.pool is not None, "Pool cannot be None for nvmeof services"
+            nvmeof_spec = cast(NvmeofServiceSpec, spec)
+            assert nvmeof_spec.pool is not None, "Pool cannot be None for nvmeof services"
+            assert nvmeof_spec.service_id is not None  # for mypy
             try:
-                self._check_pool_exists(spec.pool, spec.service_name())
+                self._check_pool_exists(nvmeof_spec.pool, nvmeof_spec.service_name())
             except OrchestratorError as e:
                 self.log.debug(f"{e}")
                 raise
+            nvmeof_spec = cast(NvmeofServiceSpec, spec)
+            assert nvmeof_spec.service_id is not None  # for mypy
+            if nvmeof_spec.group and not nvmeof_spec.service_id.endswith(nvmeof_spec.group):
+                raise OrchestratorError("The 'nvmeof' service id/name must end with '.<nvmeof-group-name>'. Found "
+                                        f"group name '{nvmeof_spec.group}' and service id '{nvmeof_spec.service_id}'")
+            for sspec in [s.spec for s in self.spec_store.get_by_service_type('nvmeof')]:
+                nspec = cast(NvmeofServiceSpec, sspec)
+                if nvmeof_spec.group == nspec.group:
+                    raise OrchestratorError(f"Cannot create nvmeof service with group {nvmeof_spec.group}. That group is already "
+                                            f"being used by the service {nspec.service_name()}")
 
         if spec.placement.count is not None:
             if spec.service_type in ['mon', 'mgr']:
index ebdbcc4991d04e81269343c9851c779b8ec3910a..4060f51e28b39055c32f5184bcf21c15fd826370 100644 (file)
@@ -339,11 +339,11 @@ class TestNVMEOFService:
     @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
     def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
 
-        nvmeof_daemon_id = 'testpool.test.qwert'
         pool = 'testpool'
+        group = 'mygroup'
+        nvmeof_daemon_id = f'{pool}.{group}.test.qwert'
         tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288'
         default_port = 5500
-        group = 'mygroup'
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
         _get_name.return_value = nvmeof_daemon_id
 
@@ -426,7 +426,7 @@ tgt_cmd_extra_args = {tgt_cmd_extra_args}
 timeout = 1.0\n"""
 
         with with_host(cephadm_module, 'test'):
-            with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool,
+            with with_service(cephadm_module, NvmeofServiceSpec(service_id=f'{pool}.{group}',
                                                                 tgt_cmd_extra_args=tgt_cmd_extra_args,
                                                                 group=group,
                                                                 pool=pool)):
@@ -437,14 +437,14 @@ timeout = 1.0\n"""
                     [],
                     stdin=json.dumps({
                         "fsid": "fsid",
-                        "name": "nvmeof.testpool.test.qwert",
+                        "name": "nvmeof.testpool.mygroup.test.qwert",
                         "image": "",
                         "deploy_arguments": [],
                         "params": {
                             "tcp_ports": [5500, 4420, 8009, 10008]
                         },
                         "meta": {
-                            "service_name": "nvmeof.testpool",
+                            "service_name": "nvmeof.testpool.mygroup",
                             "ports": [5500, 4420, 8009, 10008],
                             "ip": None,
                             "deployed_by": [],
@@ -455,7 +455,7 @@ timeout = 1.0\n"""
                         },
                         "config_blobs": {
                             "config": "",
-                            "keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n",
+                            "keyring": "[client.nvmeof.testpool.mygroup.test.qwert]\nkey = None\n",
                             "files": {
                                 "ceph-nvmeof.conf": nvmeof_gateway_conf
                             }
@@ -465,6 +465,70 @@ timeout = 1.0\n"""
                     use_current_daemon_image=False,
                 )
 
+    @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_validate_no_group_duplicate_on_apply(self, cephadm_module: CephadmOrchestrator):
+        nvmeof_spec_group1 = NvmeofServiceSpec(
+            service_id='testpool.testgroup',
+            group='testgroup',
+            pool='testpool'
+        )
+        nvmeof_spec_also_group1 = NvmeofServiceSpec(
+            service_id='testpool2.testgroup',
+            group='testgroup',
+            pool='testpool2'
+        )
+        with with_host(cephadm_module, 'test'):
+            out = cephadm_module._apply_service_spec(nvmeof_spec_group1)
+            assert out == 'Scheduled nvmeof.testpool.testgroup update...'
+            nvmeof_specs = cephadm_module.spec_store.get_by_service_type('nvmeof')
+            assert len(nvmeof_specs) == 1
+            assert nvmeof_specs[0].spec.service_name() == 'nvmeof.testpool.testgroup'
+            with pytest.raises(
+                OrchestratorError,
+                match='Cannot create nvmeof service with group testgroup. That group is already '
+                      'being used by the service nvmeof.testpool.testgroup'
+            ):
+                cephadm_module._apply_service_spec(nvmeof_spec_also_group1)
+            assert len(cephadm_module.spec_store.get_by_service_type('nvmeof')) == 1
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
+    def test_validate_service_id_matches_group_on_apply(self, cephadm_module: CephadmOrchestrator):
+        matching_nvmeof_spec_group_service_id = NvmeofServiceSpec(
+            service_id='pool1.right_group',
+            group='right_group',
+            pool='pool1'
+        )
+        mismatch_nvmeof_spec_group_service_id = NvmeofServiceSpec(
+            service_id='pool2.wrong_group',
+            group='right_group',
+            pool='pool2'
+        )
+        matching_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec(
+            service_id='pool3.right.group',
+            group='right.group',
+            pool='pool3'
+        )
+        mismatch_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec(
+            service_id='pool4.wrong.group',
+            group='right.group',
+            pool='pool4'
+        )
+        with with_host(cephadm_module, 'test'):
+            cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id)
+            with pytest.raises(
+                OrchestratorError,
+                match='The \'nvmeof\' service id/name must end with \'.<nvmeof-group-name>\'. Found '
+                      'group name \'right_group\' and service id \'pool2.wrong_group\''
+            ):
+                cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id)
+            cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id_with_dot)
+            with pytest.raises(
+                OrchestratorError,
+                match='The \'nvmeof\' service id/name must end with \'.<nvmeof-group-name>\'. Found '
+                      'group name \'right.group\' and service id \'pool4.wrong.group\''
+            ):
+                cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id_with_dot)
+
 
 class TestMonitoring:
     def _get_config(self, url: str) -> str: