From 2946b195edfc0b6a5cfcc661079e351601707ecb Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 10 Jun 2024 13:04:57 +0000 Subject: [PATCH] mgr/cephadm: ceph nvmeof monitor support Signed-off-by: Alexander Indenbaum --- src/cephadm/cephadmlib/daemons/nvmeof.py | 17 +++- src/pybind/mgr/cephadm/services/nvmeof.py | 89 ++++++++----------- src/pybind/mgr/cephadm/tests/test_services.py | 2 +- 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py index 7e8ab25163628..9b849497e0ec7 100644 --- a/src/cephadm/cephadmlib/daemons/nvmeof.py +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -63,7 +63,9 @@ class CephNvmeof(ContainerDaemonForm): return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) @staticmethod - def _get_container_mounts(data_dir: str, log_dir: str) -> Dict[str, str]: + def _get_container_mounts( + data_dir: str, log_dir: str, mtls_dir: Optional[str] = None + ) -> Dict[str, str]: mounts = dict() mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' @@ -74,6 +76,8 @@ class CephNvmeof(ContainerDaemonForm): mounts['/dev/hugepages'] = '/dev/hugepages' mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio' mounts[log_dir] = '/var/log/ceph:z' + if mtls_dir: + mounts[mtls_dir] = '/src/mtls:z' return mounts def _get_tls_cert_key_mounts( @@ -98,8 +102,15 @@ class CephNvmeof(ContainerDaemonForm): ) -> None: data_dir = self.identity.data_dir(ctx.data_dir) log_dir = os.path.join(ctx.log_dir, self.identity.fsid) - mounts.update(self._get_container_mounts(data_dir, log_dir)) - mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files)) + mtls_dir = os.path.join(ctx.data_dir, self.identity.fsid, 'mtls') + if os.path.exists(mtls_dir): + mounts.update( + self._get_container_mounts( + data_dir, log_dir, mtls_dir=mtls_dir + ) + ) + else: + mounts.update(self._get_container_mounts(data_dir, log_dir)) def customize_container_binds( self, ctx: CephadmContext, binds: List[List[str]] diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 9f9ba94557b34..b37a2da1b157b 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -7,7 +7,7 @@ from ipaddress import ip_address, IPv6Address from mgr_module import HandleCommandResult from ceph.deployment.service_spec import NvmeofServiceSpec -from orchestrator import DaemonDescription, DaemonDescriptionStatus +from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus from .cephadmservice import CephadmDaemonDeploySpec, CephService from .. import utils @@ -20,10 +20,16 @@ class NvmeofService(CephService): def config(self, spec: NvmeofServiceSpec) -> None: # type: ignore assert self.TYPE == spec.service_type - assert spec.pool - self.pool = spec.pool - assert spec.group is not None - self.group = spec.group + # Looking at src/pybind/mgr/cephadm/services/iscsi.py + # asserting spec.pool/spec.group might be appropriate + if not spec.pool: + raise OrchestratorError("pool should be in the spec") + if spec.group is None: + raise OrchestratorError("group should be in the spec") + # unlike some other config funcs, if this fails we can't + # go forward deploying the daemon and then retry later. For + # that reason we make no attempt to catch the OrchestratorError + # this may raise self.mgr._check_pool_exists(spec.pool, spec.service_name()) def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: @@ -80,34 +86,33 @@ class NvmeofService(CephService): daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) daemon_spec.deps = [] - if not hasattr(self, 'gws'): - self.gws = {} # id -> name map of gateways for this service. - self.gws[nvmeof_gw_id] = name # add to map of service's gateway names return daemon_spec def daemon_check_post(self, daemon_descrs: List[DaemonDescription]) -> None: """ Overrides the daemon_check_post to add nvmeof gateways safely """ self.mgr.log.info(f"nvmeof daemon_check_post {daemon_descrs}") - # Assert configured - assert self.pool - assert self.group is not None + spec = cast(NvmeofServiceSpec, + self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None)) + if not spec: + self.mgr.log.error(f'Failed to find spec for {daemon_descrs[0].name()}') + return + pool = spec.pool + group = spec.group for dd in daemon_descrs: - self.mgr.log.info(f"nvmeof daemon_descr {dd}") - assert dd.daemon_id in self.gws - name = self.gws[dd.daemon_id] - self.mgr.log.info(f"nvmeof daemon name={name}") # Notify monitor about this gateway creation cmd = { 'prefix': 'nvme-gw create', - 'id': name, - 'group': self.group, - 'pool': self.pool + 'id': f'{utils.name_to_config_section("nvmeof")}.{dd.daemon_id}', + 'group': group, + 'pool': pool } self.mgr.log.info(f"create gateway: monitor command {cmd}") _, _, err = self.mgr.mon_command(cmd) if err: - self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}") + err_msg = (f"Unable to send monitor command {cmd}, error {err}") + logger.error(err_msg) + raise OrchestratorError(err_msg) super().daemon_check_post(daemon_descrs) def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: @@ -119,8 +124,11 @@ class NvmeofService(CephService): self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None)) for dd in daemon_descrs: - assert dd.hostname is not None service_name = dd.service_name() + if dd.hostname is None: + err_msg = ('Trying to config_dashboard nvmeof but no hostname is defined') + logger.error(err_msg) + raise OrchestratorError(err_msg) if not spec: logger.warning(f'No ServiceSpec found for {service_name}') @@ -182,41 +190,22 @@ class NvmeofService(CephService): if not ret: logger.info(f'{daemon.hostname} removed from nvmeof gateways dashboard config') - # Assert configured - assert self.pool - assert self.group is not None - assert daemon.daemon_id in self.gws - name = self.gws[daemon.daemon_id] - self.gws.pop(daemon.daemon_id) + spec = cast(NvmeofServiceSpec, + self.mgr.spec_store.all_specs.get(daemon.service_name(), None)) + if not spec: + self.mgr.log.error(f'Failed to find spec for {daemon.name()}') + return + pool = spec.pool + group = spec.group + # Notify monitor about this gateway deletion cmd = { 'prefix': 'nvme-gw delete', - 'id': name, - 'group': self.group, - 'pool': self.pool + 'id': f'{utils.name_to_config_section("nvmeof")}.{daemon.daemon_id}', + 'group': group, + 'pool': pool } self.mgr.log.info(f"delete gateway: monitor command {cmd}") _, _, err = self.mgr.mon_command(cmd) if err: self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}") - - def purge(self, service_name: str) -> None: - """Make sure no zombie gateway is left behind - """ - # Assert configured - assert self.pool - assert self.group is not None - for daemon_id in self.gws: - name = self.gws[daemon_id] - self.gws.pop(daemon_id) - # Notify monitor about this gateway deletion - cmd = { - 'prefix': 'nvme-gw delete', - 'id': name, - 'group': self.group, - 'pool': self.pool - } - self.mgr.log.info(f"purge delete gateway: monitor command {cmd}") - _, _, err = self.mgr.mon_command(cmd) - if err: - self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}") diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index f733db7ab7734..5d6eafcd9e41f 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -405,7 +405,7 @@ omap_file_update_reloads = 10 allowed_consecutive_spdk_ping_failures = 1 spdk_ping_interval_in_seconds = 2.0 ping_spdk_under_lock = False -enable_monitor_client = False +enable_monitor_client = True [gateway-logs] log_level = INFO -- 2.39.5