From: Gil Bregman Date: Mon, 19 Jan 2026 12:18:03 +0000 (+0200) Subject: mgr/cephadm: Add some new fields to the cephadm NVMEoF spec file. X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a942bee07c3d4ab5a8509cd59dd27b8f8623aaf9;p=ceph.git mgr/cephadm: Add some new fields to the cephadm NVMEoF spec file. Fixes: https://tracker.ceph.com/issues/74446 Signed-off-by: Gil Bregman (cherry picked from commit e872693c151842ea8d6142effe65e604acecf8b8) --- diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index b2709c8687f..9b710005271 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -25,6 +25,8 @@ verify_listener_ip = {{ spec.verify_listener_ip }} # This is a development flag, do not change it abort_on_errors = {{ spec.abort_on_errors }} # This is a development flag, do not change it +abort_on_update_error = {{ spec.abort_on_update_error }} +# This is a development flag, do not change it omap_file_ignore_unlock_errors = {{ spec.omap_file_ignore_unlock_errors }} # This is a development flag, do not change it omap_file_lock_on_read = {{ spec.omap_file_lock_on_read }} @@ -45,6 +47,8 @@ max_namespaces_per_subsystem = {{ spec.max_namespaces_per_subsystem }} max_hosts_per_subsystem = {{ spec.max_hosts_per_subsystem }} subsystem_cache_expiration = {{ spec.subsystem_cache_expiration }} force_tls = {{ spec.force_tls }} +# This is a development flag, do not change it +max_message_length_in_mb = {{ spec.max_message_length_in_mb }} [gateway-logs] log_level = {{ spec.log_level }} @@ -61,6 +65,8 @@ addr = {{ discovery_addr }} port = {{ spec.discovery_port }} # This is a development flag, do not change it abort_on_errors = {{ spec.abort_discovery_on_errors }} +bind_retries_limit = {{ spec.discovery_bind_retries_limit }} +bind_sleep_interval = {{ spec.discovery_bind_sleep_interval }} [ceph] pool = {{ spec.pool }} diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 6def6f8c6ce..7e3b1f2a383 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -374,6 +374,8 @@ verify_listener_ip = True # This is a development flag, do not change it abort_on_errors = True # This is a development flag, do not change it +abort_on_update_error = True +# This is a development flag, do not change it omap_file_ignore_unlock_errors = False # This is a development flag, do not change it omap_file_lock_on_read = True @@ -394,6 +396,8 @@ max_namespaces_per_subsystem = 512 max_hosts_per_subsystem = 128 subsystem_cache_expiration = 5 force_tls = False +# This is a development flag, do not change it +max_message_length_in_mb = 4 [gateway-logs] log_level = INFO @@ -410,6 +414,8 @@ addr = 192.168.100.100 port = 8009 # This is a development flag, do not change it abort_on_errors = True +bind_retries_limit = 10 +bind_sleep_interval = 0.5 [ceph] pool = {pool} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index bde48657e65..98dc9005433 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -33,7 +33,7 @@ import yaml from ceph.deployment.hostspec import HostSpec, SpecValidationError, assert_valid_host from ceph.deployment.utils import unwrap_ipv6, valid_addr, verify_non_negative_int from ceph.deployment.utils import verify_positive_int, verify_non_negative_number -from ceph.deployment.utils import verify_boolean, verify_enum +from ceph.deployment.utils import verify_boolean, verify_enum, verify_int from ceph.utils import is_hex from ceph.smb import constants as smbconst @@ -1392,6 +1392,7 @@ class NvmeofServiceSpec(ServiceSpec): max_gws_in_grp: Optional[int] = 16, max_ns_to_change_lb_grp: Optional[int] = 8, abort_on_errors: Optional[bool] = True, + abort_on_update_error: Optional[bool] = True, omap_file_ignore_unlock_errors: Optional[bool] = False, omap_file_lock_on_read: Optional[bool] = True, omap_file_lock_duration: Optional[int] = 20, @@ -1421,6 +1422,7 @@ class NvmeofServiceSpec(ServiceSpec): max_hosts_per_subsystem: Optional[int] = 128, subsystem_cache_expiration: Optional[int] = 5, force_tls: Optional[bool] = False, + max_message_length_in_mb: Optional[int] = 4, server_key: Optional[str] = None, server_cert: Optional[str] = None, client_key: Optional[str] = None, @@ -1449,6 +1451,8 @@ class NvmeofServiceSpec(ServiceSpec): discovery_addr: Optional[str] = None, discovery_addr_map: Optional[Dict[str, str]] = None, discovery_port: Optional[int] = None, + discovery_bind_retries_limit: Optional[int] = 10, + discovery_bind_sleep_interval: Optional[float] = 0.5, abort_discovery_on_errors: Optional[bool] = True, log_level: Optional[str] = 'INFO', log_files_enabled: Optional[bool] = True, @@ -1528,6 +1532,8 @@ class NvmeofServiceSpec(ServiceSpec): self.verify_listener_ip = verify_listener_ip #: ``abort_on_errors`` abort gateway in case of errors self.abort_on_errors = abort_on_errors + #: ``abort_on_update_error`` abort gateway in case of an error during update + self.abort_on_update_error = abort_on_update_error #: ``omap_file_ignore_unlock_errors`` ignore errors when unlocking the OMAP file self.omap_file_ignore_unlock_errors = omap_file_ignore_unlock_errors #: ``omap_file_lock_on_read`` lock omap when reading its content @@ -1558,6 +1564,8 @@ class NvmeofServiceSpec(ServiceSpec): self.subsystem_cache_expiration = subsystem_cache_expiration #: ``force_tls`` force using TLS when adding hosts and listeners self.force_tls = force_tls + #: ``max_message_length_in_mb`` max protobuf message length, in mb + self.max_message_length_in_mb = max_message_length_in_mb #: ``allowed_consecutive_spdk_ping_failures`` # of ping failures before aborting gateway self.allowed_consecutive_spdk_ping_failures = allowed_consecutive_spdk_ping_failures #: ``spdk_ping_interval_in_seconds`` sleep interval in seconds between SPDK pings @@ -1632,6 +1640,10 @@ class NvmeofServiceSpec(ServiceSpec): self.discovery_addr_map = discovery_addr_map #: ``discovery_port`` port of the discovery service self.discovery_port = discovery_port or 8009 + #: ``discovery_bind_retries_limit`` how many times to keep trying bind the discovery port + self.discovery_bind_retries_limit = discovery_bind_retries_limit + #: ``discovery_bind_sleep_interval`` seconds to wait between each bind attempt + self.discovery_bind_sleep_interval = discovery_bind_sleep_interval #: ``abort_discovery_on_errors`` abort discovery service in case of errors self.abort_discovery_on_errors = abort_discovery_on_errors #: ``log_level`` the nvmeof gateway log level @@ -1744,6 +1756,7 @@ class NvmeofServiceSpec(ServiceSpec): verify_non_negative_int(self.max_ns_to_change_lb_grp, "Max namespaces to change load balancing group") verify_boolean(self.abort_on_errors, "Abort gateway on errors") + verify_boolean(self.abort_on_update_error, "Abort gateway on an update error") verify_boolean(self.omap_file_ignore_unlock_errors, "Ignore OMAP file unlock errors") verify_boolean(self.omap_file_lock_on_read, "Lock OMAP on read") verify_non_negative_int(self.omap_file_lock_duration, "OMAP file lock duration") @@ -1765,9 +1778,13 @@ class NvmeofServiceSpec(ServiceSpec): verify_non_negative_number(self.subsystem_cache_expiration, "Subsystem cache expiration period") verify_boolean(self.force_tls, "Force TLS") + verify_positive_int(self.max_message_length_in_mb, "Max protocol message length") verify_non_negative_number(self.monitor_timeout, "Monitor timeout") verify_non_negative_int(self.port, "Port") verify_non_negative_int(self.discovery_port, "Discovery port") + verify_int(self.discovery_bind_retries_limit, "Discovery port bind retries limit") + verify_non_negative_number(self.discovery_bind_sleep_interval, + "Sleep between discovery port bind retries") verify_boolean(self.abort_discovery_on_errors, "Abort discovery service on errors") verify_non_negative_int(self.prometheus_port, "Prometheus port") verify_non_negative_int(self.prometheus_stats_interval, "Prometheus stats interval") diff --git a/src/python-common/ceph/deployment/utils.py b/src/python-common/ceph/deployment/utils.py index 758eddc9412..9cd942d4c82 100644 --- a/src/python-common/ceph/deployment/utils.py +++ b/src/python-common/ceph/deployment/utils.py @@ -110,19 +110,27 @@ def verify_numeric(field: Any, field_name: str) -> None: raise SpecValidationError(f"{field_name} must be a number") -def verify_non_negative_int(field: Any, field_name: str) -> None: +def verify_int(field: Any, field_name: str) -> None: verify_numeric(field, field_name) if field is not None: if not isinstance(field, int) or isinstance(field, bool): raise SpecValidationError(f"{field_name} must be an integer") + + +def verify_non_negative_int(field: Any, field_name: str) -> None: + verify_numeric(field, field_name) + if field is not None: + verify_int(field, field_name) if field < 0: raise SpecValidationError(f"{field_name} can't be negative") def verify_positive_int(field: Any, field_name: str) -> None: verify_non_negative_int(field, field_name) - if field is not None and field <= 0: - raise SpecValidationError(f"{field_name} must be greater than zero") + if field is not None: + verify_int(field, field_name) + if field <= 0: + raise SpecValidationError(f"{field_name} must be greater than zero") def verify_non_negative_number(field: Any, field_name: str) -> None: