]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm/nvmeof: Add fields for prometheus frequency to NVMEOF spec file.
authorGil Bregman <gbregman@il.ibm.com>
Mon, 8 Sep 2025 16:29:46 +0000 (19:29 +0300)
committerGil Bregman <gbregman@il.ibm.com>
Mon, 8 Sep 2025 18:48:14 +0000 (21:48 +0300)
Fixes: https://tracker.ceph.com/issues/72805
Signed-off-by: Gil Bregman <gbregman@il.ibm.com>
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index b2709c8687ff0e64ce7928572c67152308ce2c1c..8264894ceb662f7ab775c816468e3d6f7a6a67f7 100644 (file)
@@ -17,6 +17,8 @@ enable_prometheus_exporter = {{ spec.enable_prometheus_exporter }}
 prometheus_exporter_ssl = False
 prometheus_port = {{ spec.prometheus_port }}
 prometheus_stats_interval = {{ spec.prometheus_stats_interval }}
+prometheus_frequency_slow_down_factor = {{ spec.prometheus_frequency_slow_down_factor }}
+prometheus_cycles_to_adjust_speed = {{ spec.prometheus_cycles_to_adjust_speed }}
 prometheus_startup_delay = {{ spec.prometheus_startup_delay }}
 prometheus_connection_list_cache_expiration = {{ spec.prometheus_connection_list_cache_expiration }}
 verify_nqns = {{ spec.verify_nqns }}
@@ -32,6 +34,7 @@ omap_file_lock_duration = {{ spec.omap_file_lock_duration }}
 omap_file_lock_retries = {{ spec.omap_file_lock_retries }}
 omap_file_lock_retry_sleep_interval = {{ spec.omap_file_lock_retry_sleep_interval }}
 omap_file_update_reloads = {{ spec.omap_file_update_reloads }}
+omap_file_update_attempts = {{ spec.omap_file_update_attempts }}
 allowed_consecutive_spdk_ping_failures = {{ spec.allowed_consecutive_spdk_ping_failures }}
 spdk_ping_interval_in_seconds = {{ spec.spdk_ping_interval_in_seconds }}
 ping_spdk_under_lock = {{ spec.ping_spdk_under_lock }}
index 77242b4ae0d82b525b1a334d958ed193e977f4aa..3849bf4671e3f4f1f63d688dcf82ecc4ff186d67 100644 (file)
@@ -467,6 +467,8 @@ enable_prometheus_exporter = True
 prometheus_exporter_ssl = False
 prometheus_port = 10008
 prometheus_stats_interval = 10
+prometheus_frequency_slow_down_factor = 3.0
+prometheus_cycles_to_adjust_speed = 3
 prometheus_startup_delay = 240
 prometheus_connection_list_cache_expiration = 60
 verify_nqns = True
@@ -482,6 +484,7 @@ omap_file_lock_duration = 20
 omap_file_lock_retries = 30
 omap_file_lock_retry_sleep_interval = 1.0
 omap_file_update_reloads = 10
+omap_file_update_attempts = 500
 allowed_consecutive_spdk_ping_failures = 1
 spdk_ping_interval_in_seconds = 2.0
 ping_spdk_under_lock = False
index f24cfb982e46c4acd288a323ade50d44810b9b25..4176e083893562f8293b2936e5994b188d9a0b87 100644 (file)
@@ -1584,9 +1584,12 @@ class NvmeofServiceSpec(ServiceSpec):
                  omap_file_lock_retries: Optional[int] = 30,
                  omap_file_lock_retry_sleep_interval: Optional[float] = 1.0,
                  omap_file_update_reloads: Optional[int] = 10,
+                 omap_file_update_attempts: Optional[int] = 500,
                  enable_prometheus_exporter: Optional[bool] = True,
                  prometheus_port: Optional[int] = 10008,
                  prometheus_stats_interval: Optional[int] = 10,
+                 prometheus_frequency_slow_down_factor: Optional[float] = 3.0,
+                 prometheus_cycles_to_adjust_speed: Optional[int] = 3,
                  prometheus_startup_delay: Optional[int] = 240,
                  prometheus_connection_list_cache_expiration: Optional[int] = 60,
                  bdevs_per_cluster: Optional[int] = None,
@@ -1707,6 +1710,10 @@ class NvmeofServiceSpec(ServiceSpec):
         self.prometheus_port = prometheus_port or 10008
         #: ``prometheus_stats_interval`` Prometheus get stats interval
         self.prometheus_stats_interval = prometheus_stats_interval
+        #: ``prometheus_frequency_slow_down_factor`` Ratio between get stats and the interval
+        self.prometheus_frequency_slow_down_factor = prometheus_frequency_slow_down_factor
+        #: ``prometheus_cycles_to_adjust_speed`` Number of slow cycles before adjusting interval
+        self.prometheus_cycles_to_adjust_speed = prometheus_cycles_to_adjust_speed
         #: ``prometheus_startup_delay`` Prometheus startup delay, in seconds
         self.prometheus_startup_delay = prometheus_startup_delay
         #: ``prometheus_connection_list_cache_expiration`` Expiration time of connection list cache
@@ -1730,8 +1737,10 @@ class NvmeofServiceSpec(ServiceSpec):
         self.omap_file_lock_retries = omap_file_lock_retries
         #: ``omap_file_lock_retry_sleep_interval`` seconds to wait before retrying to lock OMAP
         self.omap_file_lock_retry_sleep_interval = omap_file_lock_retry_sleep_interval
-        #: ``omap_file_update_reloads`` number of attempt to reload OMAP when it differs from local
+        #: ``omap_file_update_reloads`` number of attempts to lock OMAP when it differs from local
         self.omap_file_update_reloads = omap_file_update_reloads
+        #: ``omap_file_update_attempts`` attempts to update local state when it differs from OMAP
+        self.omap_file_update_attempts = omap_file_update_attempts
         #: ``max_hosts_per_namespace`` max number of hosts per namespace
         self.max_hosts_per_namespace = max_hosts_per_namespace
         #: ``max_namespaces_with_netmask`` max number of namespaces which are not auto visible
@@ -1968,6 +1977,7 @@ class NvmeofServiceSpec(ServiceSpec):
                                    "OMAP file lock sleep interval")
         verify_non_negative_int(self.omap_file_lock_retries, "OMAP file lock retries")
         verify_non_negative_int(self.omap_file_update_reloads, "OMAP file reloads")
+        verify_non_negative_int(self.omap_file_update_attempts, "local state updates on reload")
         verify_non_negative_number(self.spdk_timeout, "SPDK timeout")
         verify_non_negative_int(self.max_log_file_size_in_mb, "Log file size")
         verify_non_negative_int(self.max_log_files_count, "Log files count")
@@ -1988,6 +1998,11 @@ class NvmeofServiceSpec(ServiceSpec):
         verify_boolean(self.abort_discovery_on_errors, "Abort discovery service on errors")
         verify_non_negative_int(self.prometheus_port, "Prometheus port")
         verify_non_negative_int(self.prometheus_stats_interval, "Prometheus stats interval")
+        verify_non_negative_number(self.prometheus_frequency_slow_down_factor,
+                                   "Prometheus stats interval factor")
+        verify_non_negative_int(self.prometheus_cycles_to_adjust_speed,
+                                "Prometheus count of slow cycles before adjusting")
+        verify_non_negative_int(self.prometheus_startup_delay, "Prometheus startup delay")
         verify_boolean(self.state_update_notify, "State update notify")
         verify_boolean(self.enable_spdk_discovery_controller, "Enable SPDK discovery controller")
         verify_boolean(self.enable_prometheus_exporter, "Enable Prometheus exporter")