]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: add smb_metadata metric
authorAvan Thakkar <athakkar@redhat.com>
Thu, 10 Jul 2025 12:07:59 +0000 (17:37 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Mon, 15 Sep 2025 06:36:45 +0000 (12:06 +0530)
Exposed SMB metadata metric including labels:
- CephFS volume, subvolume group and subvolume
- SMB cluster ID (as netbiosname)
- SMB  version

Fixes: https://tracker.ceph.com/issues/72068
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
(cherry picked from commit e214717fea850ae2121fa566f60b696ec8ddd7a2)

src/pybind/mgr/prometheus/module.py

index a1c6644f1e26f6d8891c4a7c9f3930aacdc74969..4a6d7067852996b9c29b7d9d5052b3b27ae35cf3 100644 (file)
@@ -112,6 +112,9 @@ DISK_OCCUPATION = ('ceph_daemon', 'device', 'db_device',
 
 NUM_OBJECTS = ['degraded', 'misplaced', 'unfound']
 
+SMB_METADATA = ('smb_version', 'volume',
+                'subvolume_group', 'subvolume', 'netbiosname')
+
 alert_metric = namedtuple('alert_metric', 'name description')
 HEALTH_CHECKS = [
     alert_metric('SLOW_OPS', 'OSD or Monitor requests taking a long time to process'),
@@ -794,6 +797,13 @@ class Module(MgrModule, OrchestratorClientMixin):
             ('type', 'ceph_daemon',)
         )
 
+        metrics['smb_metadata'] = Metric(
+            'untyped',
+            'smb_metadata',
+            'SMB Metadata',
+            SMB_METADATA
+        )
+
         for flag in OSD_FLAGS:
             path = 'osd_flag_{}'.format(flag)
             metrics[path] = Metric(
@@ -1748,6 +1758,66 @@ class Module(MgrModule, OrchestratorClientMixin):
                     self.metrics[path].set(value, labels)
         self.add_fixed_name_metrics()
 
+    @profile_method()
+    def get_smb_metadata(self) -> None:
+        try:
+            mgr_map = self.get('mgr_map')
+            available_modules = [m['name'] for m in mgr_map['available_modules']]
+            if 'smb' not in available_modules:
+                self.log.debug("SMB module is not available, skipping SMB metadata collection")
+                return
+
+            if not self.available()[0]:
+                self.log.debug("Orchestrator not available")
+                return
+
+            smb_version = ""
+
+            try:
+                daemons = raise_if_exception(self.list_daemons(daemon_type='smb'))
+                if daemons:
+                    smb_version = str(daemons[0].version)
+            except Exception as e:
+                self.log.error(f"Failed to get SMB daemons: {str(e)}")
+                return
+
+            ret, out, err = self.mon_command({
+                'prefix': 'smb show',
+                'format': 'json'
+            })
+            if ret != 0:
+                self.log.error(f"Failed to get SMB info: {err}")
+                return
+
+            try:
+                smb_data = json.loads(out)
+
+                for resource in smb_data.get('resources', []):
+                    if resource.get('resource_type') == 'ceph.smb.share':
+                        self.log.info("Processing SMB share resource")
+                        cluster_id = resource.get('cluster_id')
+                        if not cluster_id:
+                            self.log.debug("Skipping share with missing cluster_id")
+                            continue
+
+                        cephfs = resource.get('cephfs', {})
+                        cephfs_volume = cephfs.get('volume', '')
+                        cephfs_subvolumegroup = cephfs.get('subvolumegroup', '_nogroup')
+                        cephfs_subvolume = cephfs.get('subvolume', '')
+                        self.metrics['smb_metadata'].set(1, (
+                            smb_version,
+                            cephfs_volume,
+                            cephfs_subvolumegroup,
+                            cephfs_subvolume,
+                            cluster_id
+                        ))
+            except json.JSONDecodeError:
+                self.log.error("Failed to decode SMB module output")
+            except Exception as e:
+                self.log.error(f"Error processing SMB metadata: {str(e)}")
+        except Exception as e:
+            self.log.error(f"Failed to get SMB metadata: {str(e)}")
+
     @profile_method(True)
     def collect(self) -> str:
         # Clear the metrics before scraping
@@ -1767,6 +1837,7 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.get_pool_repaired_objects()
         self.get_num_objects()
         self.get_all_daemon_health_metrics()
+        self.get_smb_metadata()
 
         if not self.get_module_option('exclude_perf_counters'):
             self.get_perf_counters()