]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: Add some new fields to the cephadm NVMEoF spec file. 66987/head
authorGil Bregman <gbregman@il.ibm.com>
Mon, 19 Jan 2026 12:18:03 +0000 (14:18 +0200)
committerGil Bregman <gbregman@il.ibm.com>
Tue, 20 Jan 2026 08:48:08 +0000 (10:48 +0200)
Fixes: https://tracker.ceph.com/issues/74446
Signed-off-by: Gil Bregman <gbregman@il.ibm.com>
(cherry picked from commit e872693c151842ea8d6142effe65e604acecf8b8)

src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py
src/python-common/ceph/deployment/utils.py

index b2709c8687ff0e64ce7928572c67152308ce2c1c..9b71000527186966529bd3c5a0e4bf656a659316 100644 (file)
@@ -25,6 +25,8 @@ verify_listener_ip = {{ spec.verify_listener_ip }}
 # This is a development flag, do not change it
 abort_on_errors = {{ spec.abort_on_errors }}
 # This is a development flag, do not change it
+abort_on_update_error = {{ spec.abort_on_update_error }}
+# This is a development flag, do not change it
 omap_file_ignore_unlock_errors = {{ spec.omap_file_ignore_unlock_errors }}
 # This is a development flag, do not change it
 omap_file_lock_on_read = {{ spec.omap_file_lock_on_read }}
@@ -45,6 +47,8 @@ max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }}
 max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }}
 subsystem_cache_expiration = {{ spec.subsystem_cache_expiration }}
 force_tls = {{ spec.force_tls }}
+# This is a development flag, do not change it
+max_message_length_in_mb = {{ spec.max_message_length_in_mb }}
 
 [gateway-logs]
 log_level = {{ spec.log_level }}
@@ -61,6 +65,8 @@ addr = {{ discovery_addr }}
 port = {{ spec.discovery_port }}
 # This is a development flag, do not change it
 abort_on_errors = {{ spec.abort_discovery_on_errors }}
+bind_retries_limit = {{ spec.discovery_bind_retries_limit }}
+bind_sleep_interval = {{ spec.discovery_bind_sleep_interval }}
 
 [ceph]
 pool = {{ spec.pool }}
index 6def6f8c6cee74720d95ab9694554141095b212f..7e3b1f2a383716853105f5470ef2098ba8949f90 100644 (file)
@@ -374,6 +374,8 @@ verify_listener_ip = True
 # This is a development flag, do not change it
 abort_on_errors = True
 # This is a development flag, do not change it
+abort_on_update_error = True
+# This is a development flag, do not change it
 omap_file_ignore_unlock_errors = False
 # This is a development flag, do not change it
 omap_file_lock_on_read = True
@@ -394,6 +396,8 @@ max_namespaces_per_subsystem = 512
 max_hosts_per_subsystem = 128
 subsystem_cache_expiration = 5
 force_tls = False
+# This is a development flag, do not change it
+max_message_length_in_mb = 4
 
 [gateway-logs]
 log_level = INFO
@@ -410,6 +414,8 @@ addr = 192.168.100.100
 port = 8009
 # This is a development flag, do not change it
 abort_on_errors = True
+bind_retries_limit = 10
+bind_sleep_interval = 0.5
 
 [ceph]
 pool = {pool}
index bde48657e65e83fe727b6c0bdff87ef2425f08dc..98dc900543372645a7e236b5a8def82cc999246a 100644 (file)
@@ -33,7 +33,7 @@ import yaml
 from ceph.deployment.hostspec import HostSpec, SpecValidationError, assert_valid_host
 from ceph.deployment.utils import unwrap_ipv6, valid_addr, verify_non_negative_int
 from ceph.deployment.utils import verify_positive_int, verify_non_negative_number
-from ceph.deployment.utils import verify_boolean, verify_enum
+from ceph.deployment.utils import verify_boolean, verify_enum, verify_int
 from ceph.utils import is_hex
 from ceph.smb import constants as smbconst
 
@@ -1392,6 +1392,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  max_gws_in_grp: Optional[int] = 16,
                  max_ns_to_change_lb_grp: Optional[int] = 8,
                  abort_on_errors: Optional[bool] = True,
+                 abort_on_update_error: Optional[bool] = True,
                  omap_file_ignore_unlock_errors: Optional[bool] = False,
                  omap_file_lock_on_read: Optional[bool] = True,
                  omap_file_lock_duration: Optional[int] = 20,
@@ -1421,6 +1422,7 @@ class NvmeofServiceSpec(ServiceSpec):
                  max_hosts_per_subsystem: Optional[int] = 128,
                  subsystem_cache_expiration: Optional[int] = 5,
                  force_tls: Optional[bool] = False,
+                 max_message_length_in_mb: Optional[int] = 4,
                  server_key: Optional[str] = None,
                  server_cert: Optional[str] = None,
                  client_key: Optional[str] = None,
@@ -1449,6 +1451,8 @@ class NvmeofServiceSpec(ServiceSpec):
                  discovery_addr: Optional[str] = None,
                  discovery_addr_map: Optional[Dict[str, str]] = None,
                  discovery_port: Optional[int] = None,
+                 discovery_bind_retries_limit: Optional[int] = 10,
+                 discovery_bind_sleep_interval: Optional[float] = 0.5,
                  abort_discovery_on_errors: Optional[bool] = True,
                  log_level: Optional[str] = 'INFO',
                  log_files_enabled: Optional[bool] = True,
@@ -1528,6 +1532,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.verify_listener_ip = verify_listener_ip
         #: ``abort_on_errors`` abort gateway in case of errors
         self.abort_on_errors = abort_on_errors
+        #: ``abort_on_update_error`` abort gateway in case of an error during update
+        self.abort_on_update_error = abort_on_update_error
         #: ``omap_file_ignore_unlock_errors`` ignore errors when unlocking the OMAP file
         self.omap_file_ignore_unlock_errors = omap_file_ignore_unlock_errors
         #: ``omap_file_lock_on_read`` lock omap when reading its content
@@ -1558,6 +1564,8 @@ class NvmeofServiceSpec(ServiceSpec):
         self.subsystem_cache_expiration = subsystem_cache_expiration
         #: ``force_tls`` force using TLS when adding hosts and listeners
         self.force_tls = force_tls
+        #: ``max_message_length_in_mb`` max protobuf message length, in mb
+        self.max_message_length_in_mb = max_message_length_in_mb
         #: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway
         self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures
         #: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings
@@ -1632,6 +1640,10 @@ class NvmeofServiceSpec(ServiceSpec):
         self.discovery_addr_map = discovery_addr_map
         #: ``discovery_port`` port of the discovery service
         self.discovery_port = discovery_port or 8009
+        #: ``discovery_bind_retries_limit`` how many times to keep trying bind the discovery port
+        self.discovery_bind_retries_limit = discovery_bind_retries_limit
+        #: ``discovery_bind_sleep_interval`` seconds to wait between each bind attempt
+        self.discovery_bind_sleep_interval = discovery_bind_sleep_interval
         #: ``abort_discovery_on_errors`` abort discovery service in case of errors
         self.abort_discovery_on_errors = abort_discovery_on_errors
         #: ``log_level`` the nvmeof gateway log level
@@ -1744,6 +1756,7 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_non_negative_int(self.max_ns_to_change_lb_grp,
                                 "Max namespaces to change load balancing group")
         verify_boolean(self.abort_on_errors, "Abort gateway on errors")
+        verify_boolean(self.abort_on_update_error, "Abort gateway on an update error")
         verify_boolean(self.omap_file_ignore_unlock_errors, "Ignore OMAP file unlock errors")
         verify_boolean(self.omap_file_lock_on_read, "Lock OMAP on read")
         verify_non_negative_int(self.omap_file_lock_duration, "OMAP file lock duration")
@@ -1765,9 +1778,13 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_non_negative_number(self.subsystem_cache_expiration,
                                    "Subsystem cache expiration period")
         verify_boolean(self.force_tls, "Force TLS")
+        verify_positive_int(self.max_message_length_in_mb, "Max protocol message length")
         verify_non_negative_number(self.monitor_timeout, "Monitor timeout")
         verify_non_negative_int(self.port, "Port")
         verify_non_negative_int(self.discovery_port, "Discovery port")
+        verify_int(self.discovery_bind_retries_limit, "Discovery port bind retries limit")
+        verify_non_negative_number(self.discovery_bind_sleep_interval,
+                                   "Sleep between discovery port bind retries")
         verify_boolean(self.abort_discovery_on_errors, "Abort discovery service on errors")
         verify_non_negative_int(self.prometheus_port, "Prometheus port")
         verify_non_negative_int(self.prometheus_stats_interval, "Prometheus stats interval")
index 758eddc9412409c27309d92b6347f1035e51c1a6..9cd942d4c821a644e78bfd1845b49022bcf307cb 100644 (file)
@@ -110,19 +110,27 @@ def verify_numeric(field: Any, field_name: str) -> None:
             raise SpecValidationError(f"{field_name} must be a number")
 
 
-def verify_non_negative_int(field: Any, field_name: str) -> None:
+def verify_int(field: Any, field_name: str) -> None:
     verify_numeric(field, field_name)
     if field is not None:
         if not isinstance(field, int) or isinstance(field, bool):
             raise SpecValidationError(f"{field_name} must be an integer")
+
+
+def verify_non_negative_int(field: Any, field_name: str) -> None:
+    verify_numeric(field, field_name)
+    if field is not None:
+        verify_int(field, field_name)
         if field < 0:
             raise SpecValidationError(f"{field_name} can't be negative")
 
 
 def verify_positive_int(field: Any, field_name: str) -> None:
     verify_non_negative_int(field, field_name)
-    if field is not None and field <= 0:
-        raise SpecValidationError(f"{field_name} must be greater than zero")
+    if field is not None:
+        verify_int(field, field_name)
+        if field <= 0:
+            raise SpecValidationError(f"{field_name} must be greater than zero")
 
 
 def verify_non_negative_number(field: Any, field_name: str) -> None: