SMB_METADATA = ('smb_version', 'volume',
'subvolume_group', 'subvolume', 'netbiosname', 'share')
+CEPHADM_DAEMON_STATUS = ('service_type', 'daemon_name', 'hostname', 'service_name')
+
alert_metric = namedtuple('alert_metric', 'name description')
HEALTH_CHECKS = [
alert_metric('SLOW_OPS', 'OSD or Monitor requests taking a long time to process'),
'SMB Metadata',
SMB_METADATA
)
+ metrics['cephadm_daemon_status'] = Metric(
+ 'gauge',
+ 'cephadm_daemon_status',
+ 'Status of cephadm daemons (0=stopped, 1=running, 2=errored)',
+ CEPHADM_DAEMON_STATUS
+ )
for flag in OSD_FLAGS:
path = 'osd_flag_{}'.format(flag)
(pool['pool_id'],)
)
+ @profile_method()
+ def set_cephadm_daemon_status_metrics(self) -> None:
+ try:
+ daemons = raise_if_exception(self.list_daemons())
+ for daemon in daemons:
+ service_type = getattr(daemon, 'daemon_type', '')
+ daemon_name = getattr(daemon, 'daemon_name', '')
+ hostname = str(getattr(daemon, 'hostname', ''))
+ status = getattr(daemon, 'status', '')
+ service_name_attr = getattr(daemon, 'service_name', '')
+ service_name = service_name_attr() if callable(service_name_attr) else str(service_name_attr)
+
+ self.metrics['cephadm_daemon_status'].set(
+ int(status),
+ (
+ service_type,
+ daemon_name,
+ hostname,
+ service_name,
+ )
+ )
+ except Exception as e:
+ self.log.error(f"Failed to collect cephadm daemon status: {e}")
+
@profile_method()
def get_df(self) -> None:
# maybe get the to-be-exported metrics from a config?
self.get_num_objects()
self.get_all_daemon_health_metrics()
self.get_smb_metadata()
+ self.set_cephadm_daemon_status_metrics()
if not self.get_module_option('exclude_perf_counters'):
self.get_perf_counters()