From 20eb836eda8cbd3955adb428295e131b89f95991 Mon Sep 17 00:00:00 2001 From: Gil Bregman Date: Wed, 9 Apr 2025 22:08:49 +0300 Subject: [PATCH] mgr/cephadm/nvmeof: Add OMAP read lock parameters to NVMEoF configuration Fixes https://tracker.ceph.com/issues/70861 Signed-off-by: Gil Bregman --- .../services/nvmeof/ceph-nvmeof.conf.j2 | 8 ++++++++ src/pybind/mgr/cephadm/tests/test_services.py | 10 +++++++++- .../ceph/deployment/service_spec.py | 18 +++++++++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index c540fd25cd29f..28369706edb0f 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -19,6 +19,12 @@ prometheus_stats_interval = {{ spec.prometheus_stats_interval }} verify_nqns = {{ spec.verify_nqns }} verify_keys = {{ spec.verify_keys }} verify_listener_ip = {{ spec.verify_listener_ip }} +# This is a development flag, do not change it +abort_on_errors = {{ spec.abort_on_errors }} +# This is a development flag, do not change it +omap_file_ignore_unlock_errors = {{ spec.omap_file_ignore_unlock_errors }} +# This is a development flag, do not change it +omap_file_lock_on_read = {{ spec.omap_file_lock_on_read }} omap_file_lock_duration = {{ spec.omap_file_lock_duration }} omap_file_lock_retries = {{ spec.omap_file_lock_retries }} omap_file_lock_retry_sleep_interval = {{ spec.omap_file_lock_retry_sleep_interval }} @@ -48,6 +54,8 @@ log_directory = {{ spec.log_directory }} [discovery] addr = {{ discovery_addr }} port = {{ spec.discovery_port }} +# This is a development flag, do not change it +abort_on_errors = {{ spec.abort_discovery_on_errors }} [ceph] pool = {{ spec.pool }} diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 054c20f9acca4..06a62b7d08ded 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -368,6 +368,12 @@ prometheus_stats_interval = 10 verify_nqns = True verify_keys = True verify_listener_ip = True +# This is a development flag, do not change it +abort_on_errors = True +# This is a development flag, do not change it +omap_file_ignore_unlock_errors = False +# This is a development flag, do not change it +omap_file_lock_on_read = True omap_file_lock_duration = 20 omap_file_lock_retries = 30 omap_file_lock_retry_sleep_interval = 1.0 @@ -380,7 +386,7 @@ max_hosts_per_namespace = 8 max_namespaces_with_netmask = 1000 max_subsystems = 128 max_hosts = 2048 -max_namespaces = 1024 +max_namespaces = 2048 max_namespaces_per_subsystem = 256 max_hosts_per_subsystem = 128 @@ -397,6 +403,8 @@ log_directory = /var/log/ceph/ [discovery] addr = 192.168.100.100 port = 8009 +# This is a development flag, do not change it +abort_on_errors = True [ceph] pool = {pool} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 47b64bd69293a..7637741630af6 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1380,6 +1380,9 @@ class NvmeofServiceSpec(ServiceSpec): rebalance_period_sec: Optional[int] = 7, max_gws_in_grp: Optional[int] = 16, max_ns_to_change_lb_grp: Optional[int] = 8, + abort_on_errors: Optional[bool] = True, + omap_file_ignore_unlock_errors: Optional[bool] = False, + omap_file_lock_on_read: Optional[bool] = True, omap_file_lock_duration: Optional[int] = 20, omap_file_lock_retries: Optional[int] = 30, omap_file_lock_retry_sleep_interval: Optional[float] = 1.0, @@ -1400,7 +1403,7 @@ class NvmeofServiceSpec(ServiceSpec): max_namespaces_with_netmask: Optional[int] = 1000, max_subsystems: Optional[int] = 128, max_hosts: Optional[int] = 2048, - max_namespaces: Optional[int] = 1024, + max_namespaces: Optional[int] = 2048, max_namespaces_per_subsystem: Optional[int] = 256, max_hosts_per_subsystem: Optional[int] = 128, server_key: Optional[str] = None, @@ -1428,6 +1431,7 @@ class NvmeofServiceSpec(ServiceSpec): discovery_addr: Optional[str] = None, discovery_addr_map: Optional[Dict[str, str]] = None, discovery_port: Optional[int] = None, + abort_discovery_on_errors: Optional[bool] = True, log_level: Optional[str] = 'INFO', log_files_enabled: Optional[bool] = True, log_files_rotation_enabled: Optional[bool] = True, @@ -1497,6 +1501,12 @@ class NvmeofServiceSpec(ServiceSpec): self.verify_keys = verify_keys #: ``verify_listener_ip`` enables verification of listener IP address self.verify_listener_ip = verify_listener_ip + #: ``abort_on_errors`` abort gateway in case of errors + self.abort_on_errors = abort_on_errors + #: ``omap_file_ignore_unlock_errors`` ignore errors when unlocking the OMAP file + self.omap_file_ignore_unlock_errors = omap_file_ignore_unlock_errors + #: ``omap_file_lock_on_read`` lock omap when reading its content + self.omap_file_lock_on_read = omap_file_lock_on_read #: ``omap_file_lock_duration`` number of seconds before automatically unlock OMAP file lock self.omap_file_lock_duration = omap_file_lock_duration #: ``omap_file_lock_retries`` number of retries to lock OMAP file before giving up @@ -1587,6 +1597,8 @@ class NvmeofServiceSpec(ServiceSpec): self.discovery_addr_map = discovery_addr_map #: ``discovery_port`` port of the discovery service self.discovery_port = discovery_port or 8009 + #: ``abort_discovery_on_errors`` abort discovery service in case of errors + self.abort_discovery_on_errors = abort_discovery_on_errors #: ``log_level`` the nvmeof gateway log level self.log_level = log_level or 'INFO' #: ``log_files_enabled`` enables the usage of files to keep the nameof gateway log @@ -1694,6 +1706,9 @@ class NvmeofServiceSpec(ServiceSpec): verify_non_negative_int(self.max_gws_in_grp, "Max gateways in group") verify_non_negative_int(self.max_ns_to_change_lb_grp, "Max namespaces to change load balancing group") + verify_boolean(self.abort_on_errors, "Abort gateway on errors") + verify_boolean(self.omap_file_ignore_unlock_errors, "Ignore OMAP file unlock errors") + verify_boolean(self.omap_file_lock_on_read, "Lock OMAP on read") verify_non_negative_int(self.omap_file_lock_duration, "OMAP file lock duration") verify_non_negative_number(self.omap_file_lock_retry_sleep_interval, "OMAP file lock sleep interval") @@ -1713,6 +1728,7 @@ class NvmeofServiceSpec(ServiceSpec): verify_non_negative_number(self.monitor_timeout, "Monitor timeout") verify_non_negative_int(self.port, "Port") verify_non_negative_int(self.discovery_port, "Discovery port") + verify_boolean(self.abort_discovery_on_errors, "Abort discovery service on errors") verify_non_negative_int(self.prometheus_port, "Prometheus port") verify_non_negative_int(self.prometheus_stats_interval, "Prometheus stats interval") verify_boolean(self.state_update_notify, "State update notify") -- 2.39.5