]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: ceph nvmeof monitor support
authorAlexander Indenbaum <aindenba@redhat.com>
Mon, 10 Jun 2024 13:04:57 +0000 (13:04 +0000)
committerAlexander Indenbaum <aindenba@redhat.com>
Wed, 31 Jul 2024 08:52:05 +0000 (08:52 +0000)
Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
src/cephadm/cephadmlib/daemons/nvmeof.py
src/pybind/mgr/cephadm/services/nvmeof.py
src/pybind/mgr/cephadm/tests/test_services.py

index 7e8ab25163628462797ace42829e0f461cce080f..9b849497e0ec7f5dc57f1a26ec882621c319617a 100644 (file)
@@ -63,7 +63,9 @@ class CephNvmeof(ContainerDaemonForm):
         return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
 
     @staticmethod
-    def _get_container_mounts(data_dir: str, log_dir: str) -> Dict[str, str]:
+    def _get_container_mounts(
+        data_dir: str, log_dir: str, mtls_dir: Optional[str] = None
+    ) -> Dict[str, str]:
         mounts = dict()
         mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
         mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
@@ -74,6 +76,8 @@ class CephNvmeof(ContainerDaemonForm):
         mounts['/dev/hugepages'] = '/dev/hugepages'
         mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
         mounts[log_dir] = '/var/log/ceph:z'
+        if mtls_dir:
+            mounts[mtls_dir] = '/src/mtls:z'
         return mounts
 
     def _get_tls_cert_key_mounts(
@@ -98,8 +102,15 @@ class CephNvmeof(ContainerDaemonForm):
     ) -> None:
         data_dir = self.identity.data_dir(ctx.data_dir)
         log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
-        mounts.update(self._get_container_mounts(data_dir, log_dir))
-        mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files))
+        mtls_dir = os.path.join(ctx.data_dir, self.identity.fsid, 'mtls')
+        if os.path.exists(mtls_dir):
+            mounts.update(
+                self._get_container_mounts(
+                    data_dir, log_dir, mtls_dir=mtls_dir
+                )
+            )
+        else:
+            mounts.update(self._get_container_mounts(data_dir, log_dir))
 
     def customize_container_binds(
         self, ctx: CephadmContext, binds: List[List[str]]
index 9f9ba94557b3465464a10a440508fab351757e39..b37a2da1b157b6d1381b21232409b5c59459ab0b 100644 (file)
@@ -7,7 +7,7 @@ from ipaddress import ip_address, IPv6Address
 from mgr_module import HandleCommandResult
 from ceph.deployment.service_spec import NvmeofServiceSpec
 
-from orchestrator import DaemonDescription, DaemonDescriptionStatus
+from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
 from .cephadmservice import CephadmDaemonDeploySpec, CephService
 from .. import utils
 
@@ -20,10 +20,16 @@ class NvmeofService(CephService):
 
     def config(self, spec: NvmeofServiceSpec) -> None:  # type: ignore
         assert self.TYPE == spec.service_type
-        assert spec.pool
-        self.pool = spec.pool
-        assert spec.group is not None
-        self.group = spec.group
+        # Looking at src/pybind/mgr/cephadm/services/iscsi.py
+        # asserting spec.pool/spec.group might be appropriate
+        if not spec.pool:
+            raise OrchestratorError("pool should be in the spec")
+        if spec.group is None:
+            raise OrchestratorError("group should be in the spec")
+        # unlike some other config funcs, if this fails we can't
+        # go forward deploying the daemon and then retry later. For
+        # that reason we make no attempt to catch the OrchestratorError
+        # this may raise
         self.mgr._check_pool_exists(spec.pool, spec.service_name())
 
     def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
@@ -80,34 +86,33 @@ class NvmeofService(CephService):
 
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
         daemon_spec.deps = []
-        if not hasattr(self, 'gws'):
-            self.gws = {} # id -> name map of gateways for this service.
-        self.gws[nvmeof_gw_id] = name # add to map of service's gateway names
         return daemon_spec
 
     def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None:
         """ Overrides the daemon_check_post to add nvmeof gateways safely
         """
         self.mgr.log.info(f"nvmeof daemon_check_post {daemon_descrs}")
-        # Assert configured
-        assert self.pool
-        assert self.group is not None
+        spec = cast(NvmeofServiceSpec,
+                    self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None))
+        if not spec:
+            self.mgr.log.error(f'Failed to find spec for {daemon_descrs[0].name()}')
+            return
+        pool = spec.pool
+        group = spec.group
         for dd in daemon_descrs:
-            self.mgr.log.info(f"nvmeof daemon_descr {dd}")
-            assert dd.daemon_id in self.gws
-            name = self.gws[dd.daemon_id]
-            self.mgr.log.info(f"nvmeof daemon name={name}")
             # Notify monitor about this gateway creation
             cmd = {
                 'prefix': 'nvme-gw create',
-                'id': name,
-                'group': self.group,
-                'pool': self.pool
+                'id': f'{utils.name_to_config_section("nvmeof")}.{dd.daemon_id}',
+                'group': group,
+                'pool': pool
             }
             self.mgr.log.info(f"create gateway: monitor command {cmd}")
             _, _, err = self.mgr.mon_command(cmd)
             if err:
-                self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
+                err_msg = (f"Unable to send monitor command {cmd}, error {err}")
+                logger.error(err_msg)
+                raise OrchestratorError(err_msg)
         super().daemon_check_post(daemon_descrs)
 
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
@@ -119,8 +124,11 @@ class NvmeofService(CephService):
                         self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None))
 
             for dd in daemon_descrs:
-                assert dd.hostname is not None
                 service_name = dd.service_name()
+                if dd.hostname is None:
+                    err_msg = ('Trying to config_dashboard nvmeof but no hostname is defined')
+                    logger.error(err_msg)
+                    raise OrchestratorError(err_msg)
 
                 if not spec:
                     logger.warning(f'No ServiceSpec found for {service_name}')
@@ -182,41 +190,22 @@ class NvmeofService(CephService):
         if not ret:
             logger.info(f'{daemon.hostname} removed from nvmeof gateways dashboard config')
 
-        # Assert configured
-        assert self.pool
-        assert self.group is not None
-        assert daemon.daemon_id in self.gws
-        name = self.gws[daemon.daemon_id]
-        self.gws.pop(daemon.daemon_id)
+        spec = cast(NvmeofServiceSpec,
+                    self.mgr.spec_store.all_specs.get(daemon.service_name(), None))
+        if not spec:
+            self.mgr.log.error(f'Failed to find spec for {daemon.name()}')
+            return
+        pool = spec.pool
+        group = spec.group
+
         # Notify monitor about this gateway deletion
         cmd = {
             'prefix': 'nvme-gw delete',
-            'id': name,
-            'group': self.group,
-            'pool': self.pool
+            'id': f'{utils.name_to_config_section("nvmeof")}.{daemon.daemon_id}',
+            'group': group,
+            'pool': pool
         }
         self.mgr.log.info(f"delete gateway: monitor command {cmd}")
         _, _, err = self.mgr.mon_command(cmd)
         if err:
             self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
-
-    def purge(self, service_name: str) -> None:
-        """Make sure no zombie gateway is left behind
-        """
-        # Assert configured
-        assert self.pool
-        assert self.group is not None
-        for daemon_id in self.gws:
-            name = self.gws[daemon_id]
-            self.gws.pop(daemon_id)
-            # Notify monitor about this gateway deletion
-            cmd = {
-                'prefix': 'nvme-gw delete',
-                'id': name,
-                'group': self.group,
-                'pool': self.pool
-            }
-            self.mgr.log.info(f"purge delete gateway: monitor command {cmd}")
-            _, _, err = self.mgr.mon_command(cmd)
-            if err:
-                self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}")
index f733db7ab77344ada21e224e7e53aa5b59ce43e2..5d6eafcd9e41f3058e3e2d76489cd8728be2d584 100644 (file)
@@ -405,7 +405,7 @@ omap_file_update_reloads = 10
 allowed_consecutive_spdk_ping_failures = 1
 spdk_ping_interval_in_seconds = 2.0
 ping_spdk_under_lock = False
-enable_monitor_client = False
+enable_monitor_client = True
 
 [gateway-logs]
 log_level = INFO