From: Igor Golikov <igolikov@ibm.com>
Date: Thu, 7 Aug 2025 16:35:47 +0000 (+0000)
Subject: test: add subvolume metrics sanity test
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1327fc8897c8779a8f31ea7f5e8759de2e048515;p=ceph.git

test: add subvolume metrics sanity test

Signed-off-by: Igor Golikov <igolikov@ibm.com>
Fixes: https://tracker.ceph.com/issues/68929
---

diff --git a/qa/suites/fs/volumes/tasks/volumes/test/metrics.yaml b/qa/suites/fs/volumes/tasks/volumes/test/metrics.yaml
new file mode 100644
index 00000000000..78162fd25e7
--- /dev/null
+++ b/qa/suites/fs/volumes/tasks/volumes/test/metrics.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    extra_system_packages:
+      rpm:
+        - fio
+      deb:
+        - fio
+tasks:
+  - cephfs_test_runner:
+      fail_on_skip: false
+      modules:
+        - tasks.cephfs.test_subvolume.TestSubvolumeMetrics
\ No newline at end of file
diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
index ae3f2901ed3..35d7c630dff 100644
--- a/qa/tasks/cephfs/mount.py
+++ b/qa/tasks/cephfs/mount.py
@@ -1713,4 +1713,7 @@ class CephFSMountBase(object):
             path_to_mount = subvol_paths[mount_subvol_num]
             self.cephfs_mntpt = path_to_mount
 
+    def get_mount_point(self):
+        return self.hostfs_mntpt
+
 CephFSMount = CephFSMountBase
diff --git a/qa/tasks/cephfs/test_subvolume.py b/qa/tasks/cephfs/test_subvolume.py
index ed71ed6f437..9a1755b21f4 100644
--- a/qa/tasks/cephfs/test_subvolume.py
+++ b/qa/tasks/cephfs/test_subvolume.py
@@ -1,8 +1,10 @@
 import logging
 from time import sleep
+import os
 
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
 
 log = logging.getLogger(__name__)
 
@@ -16,6 +18,7 @@ class TestSubvolume(CephFSTestCase):
         self.setup_test()
 
     def tearDown(self):
+        #pass
         # clean up
         self.cleanup_test()
         super().tearDown()
@@ -185,7 +188,7 @@ class TestSubvolume(CephFSTestCase):
         self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
 
         # override subdir subvolume with parent subvolume
-        self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+        (['setfattr', '-n', 'ceph.dir.subvolume',
                                 '-v', '1', 'group/subvol2/dir'])
         self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
                                 '-v', '1', 'group/subvol2'])
@@ -197,6 +200,7 @@ class TestSubvolume(CephFSTestCase):
         # clean up
         self.mount_a.run_shell(['rmdir', 'group/subvol2/dir/.snap/s2'])
 
+
     def test_subvolume_vxattr_removal_without_setting(self):
         """
         To verify that the ceph.dir.subvolume vxattr removal without setting doesn't cause mds crash
@@ -209,7 +213,6 @@ class TestSubvolume(CephFSTestCase):
         # cleanup
         self.mount_a.run_shell(['rm', '-rf', 'group/subvol3'])
 
-
 class TestSubvolumeReplicated(CephFSTestCase):
     CLIENTS_REQUIRED = 1
     MDSS_REQUIRED = 2
@@ -245,3 +248,108 @@ class TestSubvolumeReplicated(CephFSTestCase):
         ino0 = self.fs.read_cache("/dir1/dir2", depth=0, rank=0)[0]
         self.assertFalse(ino0['is_auth'])
         self.assertTrue(ino0['is_subvolume'])
+
+class TestSubvolumeMetrics(CephFSTestCase):
+    CLIENTS_REQUIRED = 1
+    MDSS_REQUIRED = 1
+
+    def get_subvolume_metrics(self, mds_rank=0):
+        """
+        Helper to fetch current subvolume metrics from MDS counters using rank_tell.
+        """
+        mds_info = self.fs.get_rank(rank=mds_rank)
+        mds_name = mds_info['name']
+        counters = self.fs.mds_tell(["counter", "dump"], mds_id=mds_name)
+        return counters.get("mds_subvolume_metrics")
+
+    def test_subvolume_metrics_lifecycle(self):
+        """
+        Verify that subvolume metrics are initially absent, appear after IO,
+        and disappear after the aggregation window expires.
+        """
+        subvol_name = "metrics_subv"
+        subv_path = "/volumes/_nogroup/metrics_subv"
+
+        # no metrics initially
+        subvol_metrics = self.get_subvolume_metrics()
+        self.assertFalse(subvol_metrics, "Subvolume metrics should not be present before I/O")
+
+        # create subvolume
+        self.fs.run_ceph_cmd('fs', 'subvolume', 'create', 'cephfs', subvol_name)
+
+        # generate some I/O
+        mount_point = self.mount_a.get_mount_point()
+        suvolume_fs_path = self.fs.get_ceph_cmd_stdout('fs', 'subvolume', 'getpath', 'cephfs', subvol_name).strip()
+        suvolume_fs_path = os.path.join(mount_point, suvolume_fs_path.strip('/'))
+
+        # do some writes
+        filename = os.path.join(suvolume_fs_path, "file0")
+        self.mount_a.run_shell_payload("sudo fio "
+                                       "--name test -rw=write "
+                                       "--bs=4k --numjobs=1 --time_based "
+                                       "--runtime=20s --verify=0 --size=1G "
+                                       f"--filename={filename}", wait=True)
+
+        subvol_metrics = None
+        with safe_while(sleep=1, tries=30, action='wait for subvolume write counters') as proceed:
+            while proceed():
+                # verify that metrics are available
+                subvol_metrics = self.get_subvolume_metrics()
+                if subvol_metrics:
+                    break
+
+        log.debug(f'verifying for write: subvol_metrics={subvol_metrics}')
+
+        # Extract first metric entry
+        metric = subvol_metrics[0]
+        counters = metric["counters"]
+        labels = metric["labels"]
+
+        # Label checks
+        self.assertEqual(labels["fs_name"], "cephfs", "Unexpected fs_name in subvolume metrics")
+        self.assertEqual(labels["subvolume_path"], subv_path, "Unexpected subvolume_path in subvolume metrics")
+
+        # Counter presence and value checks
+        self.assertIn("avg_read_iops", counters)
+        self.assertIn("avg_read_tp_Bps", counters)
+        self.assertIn("avg_read_lat_msec", counters)
+        self.assertIn("avg_write_iops", counters)
+        self.assertIn("avg_write_tp_Bps", counters)
+        self.assertIn("avg_write_lat_msec", counters)
+
+        # check write metrics
+        self.assertGreater(counters["avg_write_iops"], 0, "Expected avg_write_iops to be > 0")
+        self.assertGreater(counters["avg_write_tp_Bps"], 0, "Expected avg_write_tp_Bps to be > 0")
+        self.assertGreaterEqual(counters["avg_write_lat_msec"], 0, "Expected avg_write_lat_msec to be > 0")
+
+        # do some reads
+        self.mount_a.run_shell_payload("sudo fio "
+                                       "--name test -rw=read "
+                                       "--bs=4k --numjobs=1 --time_based "
+                                       "--runtime=20s --verify=0 --size=1G "
+                                       f"--filename={filename}", wait=True)
+
+        subvol_metrics = None
+        with safe_while(sleep=1, tries=30, action='wait for subvolume read counters') as proceed:
+            while proceed():
+                # verify that metrics are available
+                subvol_metrics = self.get_subvolume_metrics()
+                if subvol_metrics:
+                    break
+
+        log.debug(f'verifying for read: subvol_metrics={subvol_metrics}')
+
+        metric = subvol_metrics[0]
+        counters = metric["counters"]
+
+        # Assert expected values (example: write I/O occurred, read did not)
+        self.assertGreater(counters["avg_read_iops"], 0, "Expected avg_read_iops to be >= 0")
+        self.assertGreater(counters["avg_read_tp_Bps"], 0, "Expected avg_read_tp_Bps to be >= 0")
+        self.assertGreaterEqual(counters["avg_read_lat_msec"], 0, "Expected avg_read_lat_msec to be >= 0")
+
+        # wait for metrics to expire after inactivity
+        sleep(60)
+
+        # verify that metrics are not present anymore
+        subvolume_metrics = self.get_subvolume_metrics()
+        self.assertFalse(subvolume_metrics, "Subvolume metrics should be gone after inactivity window")