From bcf22b9044c43f87e3d934110560f63a29514a67 Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Thu, 8 Feb 2024 08:25:37 +0100 Subject: [PATCH] backport mgr/prometheus: fix orch check to prevent Prometheus crash https://tracker.ceph.com/issues/64339 Signed-off-by: Redouane Kachach (cherry picked from commit de7da4a77ddbd33bbe050c52c4c5bbbb6ab1919f) --- src/pybind/mgr/prometheus/module.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 96f08f027a9ec..b92d8dc184f32 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -13,7 +13,7 @@ from collections import namedtuple from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand from mgr_util import get_default_addr, profile_method, build_url -from orchestrator import OrchestratorClientMixin, raise_if_exception, NoOrchestrator +from orchestrator import OrchestratorClientMixin, raise_if_exception, OrchestratorError from rbd import RBD from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable @@ -646,8 +646,6 @@ class Module(MgrModule, OrchestratorClientMixin): _global_instance = self self.metrics_thread = MetricCollectionThread(_global_instance) self.health_history = HealthHistory(self) - self.modify_instance_id = self.get_orch_status() and self.get_module_option( - 'exclude_perf_counters') def _setup_static_metrics(self) -> Dict[str, Metric]: metrics = {} @@ -864,10 +862,12 @@ class Module(MgrModule, OrchestratorClientMixin): return metrics - def get_orch_status(self) -> bool: + def orch_is_available(self) -> bool: try: return self.available()[0] - except NoOrchestrator: + except (RuntimeError, OrchestratorError, ImportError): + # import error could happend during startup in case + # orchestrator has not been loaded yet by the mgr return False def get_server_addr(self) -> str: @@ -1292,18 +1292,22 @@ class Module(MgrModule, OrchestratorClientMixin): # Populate other servers metadata # If orchestrator is available and ceph-exporter is running modify rgw instance id # to match the one from exporter - if self.modify_instance_id: + modify_instance_id = self.orch_is_available() and self.get_module_option('exclude_perf_counters') + if modify_instance_id: daemons = raise_if_exception(self.list_daemons(daemon_type='rgw')) for daemon in daemons: + if daemon.daemon_id and '.' in daemon.daemon_id: + instance_id = daemon.daemon_id.split(".")[2] + else: + instance_id = daemon.daemon_id if daemon.daemon_id else "" self.metrics['rgw_metadata'].set(1, - ('{}.{}'.format(str(daemon.daemon_type), - str(daemon.daemon_id)), + (f"{daemon.daemon_type}.{daemon.daemon_id}", str(daemon.hostname), str(daemon.version), - str(daemon.daemon_id).split(".")[2])) + instance_id)) for key, value in servers.items(): service_id, service_type = key - if service_type == 'rgw' and not self.modify_instance_id: + if service_type == 'rgw' and not modify_instance_id: hostname, version, name = value self.metrics['rgw_metadata'].set( 1, -- 2.39.5