From ff33cccf33b70bd4d29ff1dc9e09c87f2ec17379 Mon Sep 17 00:00:00 2001
From: Neeraj Pratap Singh <neesingh@redhat.com>
Date: Mon, 6 Jun 2022 11:32:46 +0530
Subject: [PATCH] qa: Update the qa tests to be compatible with the new
 structure of 'perf stats' o/p.

test_client_metrics_and_metadataand other tests has been
updated as earlier it was checking according to the old structure
of perf stats o/p, which has been changed in this PR.

Fixes: https://tracker.ceph.com/issues/56162
Signed-off-by: Neeraj Pratap Singh <neesingh@redhat.com>
---
 qa/tasks/cephfs/test_mds_metrics.py | 106 +++++++++++++++++-----------
 1 file changed, 63 insertions(+), 43 deletions(-)

diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py
index 727b80c6a91..f7cd89cafde 100644
--- a/qa/tasks/cephfs/test_mds_metrics.py
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -31,23 +31,28 @@ class TestMDSMetrics(CephFSTestCase):
         if curr_max_mds > 1:
             self.fs.shrink(1)
 
-    def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[]):
+    def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]):
         def verify_metrics_cbk(metrics):
             mds_metrics = metrics['metrics']
             if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set
                 return False
             fs_status = self.fs.status()
-            nonlocal ranks
+            nonlocal ranks, mul_fs
             if not ranks:
-                ranks = set([info['rank'] for info in fs_status.get_ranks(self.fs.id)])
+                if not mul_fs:
+                    mul_fs = [self.fs.id]
+                for filesystem in mul_fs:
+                    ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)])
             for rank in ranks:
                 r = mds_metrics.get("mds.{}".format(rank), None)
                 if not r or not len(mds_metrics['delayed_ranks']) == 0:
                     return False
-            global_metrics = metrics['global_metrics']
-            client_metadata = metrics['client_metadata']
-            if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
-                return False
+            for item in mul_fs:
+                key = fs_status.get_fsmap(item)['mdsmap']['fs_name']
+                global_metrics = metrics['global_metrics'].get(key, {})
+                client_metadata = metrics['client_metadata'].get(key, {})
+                if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
+                    return False
             return True
         return verify_metrics_cbk
 
@@ -102,12 +107,12 @@ class TestMDSMetrics(CephFSTestCase):
 
     def _setup_fs(self, fs_name):
         fs_a = self.mds_cluster.newfs(name=fs_name)
-        
+
         self.mds_cluster.mds_restart()
 
         # Wait for filesystem to go healthy
         fs_a.wait_for_daemons()
-        
+
         # Reconfigure client auth caps
         for mount in self.mounts:
             self.mds_cluster.mon_manager.raw_cluster_cmd_result(
@@ -326,7 +331,7 @@ class TestMDSMetrics(CephFSTestCase):
         log.debug("metrics={0}".format(metrics))
         self.assertTrue(valid)
 
-        client_matadata = metrics['client_metadata']
+        client_matadata = metrics['client_metadata'][self.fs.name]
         # pick an random client
         client = random.choice(list(client_matadata.keys()))
         # get IP of client to use in filter
@@ -338,8 +343,8 @@ class TestMDSMetrics(CephFSTestCase):
         self.assertTrue(valid)
 
         # verify IP from output with filter IP
-        for i in metrics['client_metadata']:
-            self.assertEqual(client_ip, metrics['client_metadata'][i]['IP'])
+        for i in metrics['client_metadata'][self.fs.name]:
+            self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP'])
 
     def test_query_mds_and_client_filter(self):
         # validate
@@ -423,21 +428,20 @@ class TestMDSMetrics(CephFSTestCase):
         log.debug(f'metrics={metrics}')
         self.assertTrue(valid)
 
-        #mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
-        #entries from the global_metrics.
+        # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
+        # entries from the global_metrics.
         client_a_name = f'client.{self.mount_a.get_global_id()}'
         client_b_name = f'client.{self.mount_b.get_global_id()}'
 
         global_metrics = metrics['global_metrics']
-        client_a_metrics = global_metrics[client_a_name]
-        client_b_metrics = global_metrics[client_b_name]
+        client_a_metrics = global_metrics[self.fs.name][client_a_name]
+        client_b_metrics = global_metrics[self.fs.name][client_b_name]
 
-        #fail rank0 mds
+        # fail rank0 mds
         self.fs.rank_fail(rank=0)
 
-        # Wait for 10 seconds for the failover to complete and
-        # the mgr to get initial metrics from the new rank0 mds.
-        time.sleep(10)
+        # Wait for rank0 up:active state
+        self.fs.wait_for_state('up:active', rank=0, timeout=30)
 
         fscid = self.fs.id
 
@@ -457,15 +461,22 @@ class TestMDSMetrics(CephFSTestCase):
             log.debug(f'metrics={metrics_new}')
             self.assertTrue(valid)
 
-            global_metrics = metrics_new['global_metrics']
-            client_a_metrics_new = global_metrics[client_a_name]
-            client_b_metrics_new = global_metrics[client_b_name]
+            client_metadata = metrics_new['client_metadata']
+            client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {})
 
-            #the metrics should be different for the test to succeed.
-            self.assertNotEqual(client_a_metrics, client_a_metrics_new)
-            self.assertNotEqual(client_b_metrics, client_b_metrics_new)
+            global_metrics = metrics_new['global_metrics']
+            client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {})
+
+            # the metrics should be different for the test to succeed.
+            self.assertTrue(client_a_metadata and client_b_metadata and
+                            client_a_metrics_new and client_b_metrics_new and
+                            (client_a_metrics_new != client_a_metrics) and
+                            (client_b_metrics_new != client_b_metrics),
+                            "Invalid 'ceph fs perf stats' metrics after rank0 mds failover")
         except MaxWhileTries:
-            raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
+            raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics")
         finally:
             # cleanup test directories
             self._cleanup_test_dirs()
@@ -473,13 +484,13 @@ class TestMDSMetrics(CephFSTestCase):
     def test_client_metrics_and_metadata(self):
         self.mount_a.umount_wait()
         self.mount_b.umount_wait()
+        self.fs.delete_all_filesystems()
 
         self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
-            "enable_multiple", "true",
-            "--yes-i-really-mean-it")
-            
-        #creating filesystem
-        fs_a = self._setup_fs(fs_name = "fs1")
+            "enable_multiple", "true", "--yes-i-really-mean-it")
+
+        # creating filesystem
+        fs_a = self._setup_fs(fs_name="fs1")
 
         # Mount a client on fs_a
         self.mount_a.mount_wait(cephfs_name=fs_a.name)
@@ -488,8 +499,8 @@ class TestMDSMetrics(CephFSTestCase):
         self.mount_a.path_to_ino("test.bin")
         self.mount_a.create_files()
 
-        #creating another filesystem
-        fs_b = self._setup_fs(fs_name = "fs2")
+        # creating another filesystem
+        fs_b = self._setup_fs(fs_name="fs2")
 
         # Mount a client on fs_b
         self.mount_b.mount_wait(cephfs_name=fs_b.name)
@@ -497,17 +508,26 @@ class TestMDSMetrics(CephFSTestCase):
         self.mount_b.path_to_ino("test.bin")
         self.mount_b.create_files()
 
+        fscid_list = [fs_a.id, fs_b.id]
+
         # validate
         valid, metrics = self._get_metrics(
-            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+            self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30)
         log.debug(f"metrics={metrics}")
         self.assertTrue(valid)
-        
-        client_metadata = metrics['client_metadata']
-
-        for i in client_metadata:
-            if not (client_metadata[i]['hostname']):
-                raise RuntimeError("hostname not found!")
-            if not (client_metadata[i]['valid_metrics']):
-                raise RuntimeError("valid_metrics not found!")
+
+        client_metadata_a = metrics['client_metadata']['fs1']
+        client_metadata_b = metrics['client_metadata']['fs2']
+
+        for i in client_metadata_a:
+            if not (client_metadata_a[i]['hostname']):
+                raise RuntimeError("hostname of fs1 not found!")
+            if not (client_metadata_a[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs1 not found!")
+
+        for i in client_metadata_b:
+            if not (client_metadata_b[i]['hostname']):
+                raise RuntimeError("hostname of fs2 not found!")
+            if not (client_metadata_b[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs2 not found!")
 
-- 
2.47.3