]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: Add some new fields to the cephadm NVMEoF spec file. 66968/head
authorGil Bregman <gbregman@il.ibm.com>
Mon, 19 Jan 2026 12:18:03 +0000 (14:18 +0200)
committerGil Bregman <gbregman@il.ibm.com>
Mon, 19 Jan 2026 13:05:35 +0000 (15:05 +0200)
Fixes: https://tracker.ceph.com/issues/74446
Signed-off-by: Gil Bregman <gbregman@il.ibm.com>
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py
src/python-common/ceph/deployment/utils.py

index 2a0293998f9a8e4b341a4db4046261d1169fefb9..2f9d6a84f865371dd305e42c93bb549230ac4fea 100644 (file)
@@ -27,6 +27,8 @@ verify_listener_ip = {{ spec.verify_listener_ip }}
 # This is a development flag, do not change it
 abort_on_errors = {{ spec.abort_on_errors }}
 # This is a development flag, do not change it
+abort_on_update_error = {{ spec.abort_on_update_error }}
+# This is a development flag, do not change it
 omap_file_ignore_unlock_errors = {{ spec.omap_file_ignore_unlock_errors }}
 # This is a development flag, do not change it
 omap_file_lock_on_read = {{ spec.omap_file_lock_on_read }}
@@ -48,6 +50,8 @@ max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }}
 max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }}
 subsystem_cache_expiration = {{ spec.subsystem_cache_expiration }}
 force_tls = {{ spec.force_tls }}
+# This is a development flag, do not change it
+max_message_length_in_mb = {{ spec.max_message_length_in_mb }}
 
 [gateway-logs]
 log_level = {{ spec.log_level }}
@@ -64,6 +68,8 @@ addr = {{ discovery_addr }}
 port = {{ spec.discovery_port }}
 # This is a development flag, do not change it
 abort_on_errors = {{ spec.abort_discovery_on_errors }}
+bind_retries_limit = {{ spec.discovery_bind_retries_limit }}
+bind_sleep_interval = {{ spec.discovery_bind_sleep_interval }}
 
 [ceph]
 pool = {{ spec.pool }}
index 864c4e1a6f558f26113e77daf9548ad6f7b47d3a..485f1e63fff107c27783f4d7bccfb1418783cba0 100644 (file)
@@ -477,6 +477,8 @@ verify_listener_ip = True
 # This is a development flag, do not change it
 abort_on_errors = True
 # This is a development flag, do not change it
+abort_on_update_error = True
+# This is a development flag, do not change it
 omap_file_ignore_unlock_errors = False
 # This is a development flag, do not change it
 omap_file_lock_on_read = True
@@ -498,6 +500,8 @@ max_namespaces_per_subsystem = 512
 max_hosts_per_subsystem = 128
 subsystem_cache_expiration = 30
 force_tls = False
+# This is a development flag, do not change it
+max_message_length_in_mb = 4
 
 [gateway-logs]
 log_level = INFO
@@ -514,6 +518,8 @@ addr = 192.168.100.100
 port = 8009
 # This is a development flag, do not change it
 abort_on_errors = True
+bind_retries_limit = 10
+bind_sleep_interval = 0.5
 
 [ceph]
 pool = {pool}
index 722b5848d2e25b0221be5f04047b58824eebe96a..932044741ca6b704fbe1ed9e6630b7a3616b05be 100644 (file)
@@ -36,7 +36,7 @@ import yaml
 from ceph.deployment.hostspec import HostSpec, SpecValidationError, assert_valid_host
 from ceph.deployment.utils import unwrap_ipv6, valid_addr, verify_non_negative_int
 from ceph.deployment.utils import verify_positive_int, verify_non_negative_number
-from ceph.deployment.utils import verify_boolean, verify_enum
+from ceph.deployment.utils import verify_boolean, verify_enum, verify_int
 from ceph.deployment.utils import parse_combined_pem_file
 from ceph.utils import is_hex
 from ceph.smb import constants as smbconst
@@ -1660,6 +1660,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  max_gws_in_grp: Optional[int] = 16,
                  max_ns_to_change_lb_grp: Optional[int] = 8,
                  abort_on_errors: Optional[bool] = True,
+                 abort_on_update_error: Optional[bool] = True,
                  omap_file_ignore_unlock_errors: Optional[bool] = False,
                  omap_file_lock_on_read: Optional[bool] = True,
                  omap_file_lock_duration: Optional[int] = 20,
@@ -1692,6 +1693,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  max_hosts_per_subsystem: Optional[int] = 128,
                  subsystem_cache_expiration: Optional[int] = 30,
                  force_tls: Optional[bool] = False,
+                 max_message_length_in_mb: Optional[int] = 4,
                  server_key: Optional[str] = None,
                  server_cert: Optional[str] = None,
                  client_key: Optional[str] = None,
@@ -1721,6 +1723,8 @@ class NvmeofServiceSpec(ServiceSpec):
                  discovery_addr: Optional[str] = None,
                  discovery_addr_map: Optional[Dict[str, str]] = None,
                  discovery_port: Optional[int] = None,
+                 discovery_bind_retries_limit: Optional[int] = 10,
+                 discovery_bind_sleep_interval: Optional[float] = 0.5,
                  abort_discovery_on_errors: Optional[bool] = True,
                  log_level: Optional[str] = 'INFO',
                  log_files_enabled: Optional[bool] = True,
@@ -1810,6 +1814,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.verify_listener_ip = verify_listener_ip
         #: ``abort_on_errors`` abort gateway in case of errors
         self.abort_on_errors = abort_on_errors
+        #: ``abort_on_update_error`` abort gateway in case of an error during update
+        self.abort_on_update_error = abort_on_update_error
         #: ``omap_file_ignore_unlock_errors`` ignore errors when unlocking the OMAP file
         self.omap_file_ignore_unlock_errors = omap_file_ignore_unlock_errors
         #: ``omap_file_lock_on_read`` lock omap when reading its content
@@ -1842,6 +1848,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.subsystem_cache_expiration = subsystem_cache_expiration
         #: ``force_tls`` force using TLS when adding hosts and listeners
         self.force_tls = force_tls
+        #: ``max_message_length_in_mb`` max protobuf message length, in mb
+        self.max_message_length_in_mb = max_message_length_in_mb
         #: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway
         self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures
         #: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings
@@ -1922,6 +1930,10 @@ class NvmeofServiceSpec(ServiceSpec):
         self.discovery_addr_map = discovery_addr_map
         #: ``discovery_port`` port of the discovery service
         self.discovery_port = discovery_port or 8009
+        #: ``discovery_bind_retries_limit`` how many times to keep trying bind the discovery port
+        self.discovery_bind_retries_limit = discovery_bind_retries_limit
+        #: ``discovery_bind_sleep_interval`` seconds to wait between each bind attempt
+        self.discovery_bind_sleep_interval = discovery_bind_sleep_interval
         #: ``abort_discovery_on_errors`` abort discovery service in case of errors
         self.abort_discovery_on_errors = abort_discovery_on_errors
         #: ``log_level`` the nvmeof gateway log level
@@ -2055,6 +2067,7 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_non_negative_int(self.max_ns_to_change_lb_grp,
                                 "Max namespaces to change load balancing group")
         verify_boolean(self.abort_on_errors, "Abort gateway on errors")
+        verify_boolean(self.abort_on_update_error, "Abort gateway on an update error")
         verify_boolean(self.omap_file_ignore_unlock_errors, "Ignore OMAP file unlock errors")
         verify_boolean(self.omap_file_lock_on_read, "Lock OMAP on read")
         verify_non_negative_int(self.omap_file_lock_duration, "OMAP file lock duration")
@@ -2081,9 +2094,13 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_non_negative_number(self.subsystem_cache_expiration,
                                    "Subsystem cache expiration period")
         verify_boolean(self.force_tls, "Force TLS")
+        verify_positive_int(self.max_message_length_in_mb, "Max protocol message length")
         verify_non_negative_number(self.monitor_timeout, "Monitor timeout")
         verify_non_negative_int(self.port, "Port")
         verify_non_negative_int(self.discovery_port, "Discovery port")
+        verify_int(self.discovery_bind_retries_limit, "Discovery port bind retries limit")
+        verify_non_negative_number(self.discovery_bind_sleep_interval,
+                                   "Sleep between discovery port bind retries")
         verify_boolean(self.abort_discovery_on_errors, "Abort discovery service on errors")
         verify_non_negative_int(self.prometheus_port, "Prometheus port")
         verify_non_negative_int(self.prometheus_stats_interval, "Prometheus stats interval")
index 0bc92b6df7ae230c9d9fd3eefc3a3b4afd6c7b9b..f28573b03fa53a327ce06410e63677cf9ea3b287 100644 (file)
@@ -129,19 +129,27 @@ def verify_numeric(field: Any, field_name: str) -> None:
             raise SpecValidationError(f"{field_name} must be a number")
 
 
-def verify_non_negative_int(field: Any, field_name: str) -> None:
+def verify_int(field: Any, field_name: str) -> None:
     verify_numeric(field, field_name)
     if field is not None:
         if not isinstance(field, int) or isinstance(field, bool):
             raise SpecValidationError(f"{field_name} must be an integer")
+
+
+def verify_non_negative_int(field: Any, field_name: str) -> None:
+    verify_numeric(field, field_name)
+    if field is not None:
+        verify_int(field, field_name)
         if field < 0:
             raise SpecValidationError(f"{field_name} can't be negative")
 
 
 def verify_positive_int(field: Any, field_name: str) -> None:
     verify_non_negative_int(field, field_name)
-    if field is not None and field <= 0:
-        raise SpecValidationError(f"{field_name} must be greater than zero")
+    if field is not None:
+        verify_int(field, field_name)
+        if field <= 0:
+            raise SpecValidationError(f"{field_name} must be greater than zero")
 
 
 def verify_non_negative_number(field: Any, field_name: str) -> None: