qa: Update the qa tests to be compatible with the new structure of 'perf stats' o/p.

author Neeraj Pratap Singh <neesingh@redhat.com>

Mon, 6 Jun 2022 06:02:46 +0000 (11:32 +0530)

committer Jos Collin <jcollin@redhat.com>

Thu, 1 Sep 2022 05:46:25 +0000 (11:16 +0530)
author Neeraj Pratap Singh <neesingh@redhat.com>
Mon, 6 Jun 2022 06:02:46 +0000 (11:32 +0530)
committer Jos Collin <jcollin@redhat.com>
Thu, 1 Sep 2022 05:46:25 +0000 (11:16 +0530)
diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py

index 0d78e54faf0ae72fc362934f8bf3ebe4e637a633..2280cc13f69d79675c129299fb1688c4e1677c14 100644 (file)
--- a/qa/tasks/cephfs/test_mds_metrics.py
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -31,23 +31,28 @@ class TestMDSMetrics(CephFSTestCase):
          if curr_max_mds > 1:
              self.fs.shrink(1)
  
-    def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[]):
+    def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]):
          def verify_metrics_cbk(metrics):
              mds_metrics = metrics['metrics']
              if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set
                  return False
              fs_status = self.fs.status()
-            nonlocal ranks
+            nonlocal ranks, mul_fs
              if not ranks:
-                ranks = set([info['rank'] for info in fs_status.get_ranks(self.fs.id)])
+                if not mul_fs:
+                    mul_fs = [self.fs.id]
+                for filesystem in mul_fs:
+                    ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)])
              for rank in ranks:
                  r = mds_metrics.get("mds.{}".format(rank), None)
                  if not r or not len(mds_metrics['delayed_ranks']) == 0:
                      return False
-            global_metrics = metrics['global_metrics']
-            client_metadata = metrics['client_metadata']
-            if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
-                return False
+            for item in mul_fs:
+                key = fs_status.get_fsmap(item)['mdsmap']['fs_name']
+                global_metrics = metrics['global_metrics'].get(key, {})
+                client_metadata = metrics['client_metadata'].get(key, {})
+                if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
+                    return False
              return True
          return verify_metrics_cbk
  
@@ -102,12 +107,12 @@ class TestMDSMetrics(CephFSTestCase):
  
      def _setup_fs(self, fs_name):
          fs_a = self.mds_cluster.newfs(name=fs_name)
-        
+
          self.mds_cluster.mds_restart()
  
          # Wait for filesystem to go healthy
          fs_a.wait_for_daemons()
-        
+
          # Reconfigure client auth caps
          for mount in self.mounts:
              self.mds_cluster.mon_manager.raw_cluster_cmd_result(
@@ -326,7 +331,7 @@ class TestMDSMetrics(CephFSTestCase):
          log.debug("metrics={0}".format(metrics))
          self.assertTrue(valid)
  
-        client_matadata = metrics['client_metadata']
+        client_matadata = metrics['client_metadata'][self.fs.name]
          # pick an random client
          client = random.choice(list(client_matadata.keys()))
          # get IP of client to use in filter
@@ -338,8 +343,8 @@ class TestMDSMetrics(CephFSTestCase):
          self.assertTrue(valid)
  
          # verify IP from output with filter IP
-        for i in metrics['client_metadata']:
-            self.assertEqual(client_ip, metrics['client_metadata'][i]['IP'])
+        for i in metrics['client_metadata'][self.fs.name]:
+            self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP'])
  
      def test_query_mds_and_client_filter(self):
          # validate
@@ -413,47 +418,6 @@ class TestMDSMetrics(CephFSTestCase):
          else:
              raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip")
      
-    def test_client_metrics_and_metadata(self):
-        self.mount_a.umount_wait()
-        self.mount_b.umount_wait()
-
-        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
-            "enable_multiple", "true",
-            "--yes-i-really-mean-it")
-            
-        #creating filesystem
-        fs_a = self._setup_fs(fs_name = "fs1")
-
-        # Mount a client on fs_a
-        self.mount_a.mount_wait(cephfs_name=fs_a.name)
-        self.mount_a.write_n_mb("pad.bin", 1)
-        self.mount_a.write_n_mb("test.bin", 2)
-        self.mount_a.path_to_ino("test.bin")
-        self.mount_a.create_files()
-
-        #creating another filesystem
-        fs_b = self._setup_fs(fs_name = "fs2")
-
-        # Mount a client on fs_b
-        self.mount_b.mount_wait(cephfs_name=fs_b.name)
-        self.mount_b.write_n_mb("test.bin", 1)
-        self.mount_b.path_to_ino("test.bin")
-        self.mount_b.create_files()
-
-        # validate
-        valid, metrics = self._get_metrics(
-            self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
-        log.debug(f"metrics={metrics}")
-        self.assertTrue(valid)
-        
-        client_metadata = metrics['client_metadata']
-
-        for i in client_metadata:
-            if not (client_metadata[i]['hostname']):
-                raise RuntimeError("hostname not found!")
-            if not (client_metadata[i]['valid_metrics']):
-                raise RuntimeError("valid_metrics not found!")
-
      def test_perf_stats_stale_metrics(self):
          """
          That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover
@@ -464,21 +428,20 @@ class TestMDSMetrics(CephFSTestCase):
          log.debug(f'metrics={metrics}')
          self.assertTrue(valid)
  
-        #mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
-        #entries from the global_metrics.
+        # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
+        # entries from the global_metrics.
          client_a_name = f'client.{self.mount_a.get_global_id()}'
          client_b_name = f'client.{self.mount_b.get_global_id()}'
  
          global_metrics = metrics['global_metrics']
-        client_a_metrics = global_metrics[client_a_name]
-        client_b_metrics = global_metrics[client_b_name]
+        client_a_metrics = global_metrics[self.fs.name][client_a_name]
+        client_b_metrics = global_metrics[self.fs.name][client_b_name]
  
-        #fail rank0 mds
+        # fail rank0 mds
          self.fs.rank_fail(rank=0)
  
-        # Wait for 10 seconds for the failover to complete and
-        # the mgr to get initial metrics from the new rank0 mds.
-        time.sleep(10)
+        # Wait for rank0 up:active state
+        self.fs.wait_for_state('up:active', rank=0, timeout=30)
  
          fscid = self.fs.id
  
@@ -498,15 +461,73 @@ class TestMDSMetrics(CephFSTestCase):
              log.debug(f'metrics={metrics_new}')
              self.assertTrue(valid)
  
-            global_metrics = metrics_new['global_metrics']
-            client_a_metrics_new = global_metrics[client_a_name]
-            client_b_metrics_new = global_metrics[client_b_name]
+            client_metadata = metrics_new['client_metadata']
+            client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {})
  
-            #the metrics should be different for the test to succeed.
-            self.assertNotEqual(client_a_metrics, client_a_metrics_new)
-            self.assertNotEqual(client_b_metrics, client_b_metrics_new)
+            global_metrics = metrics_new['global_metrics']
+            client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {})
+            client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {})
+
+            # the metrics should be different for the test to succeed.
+            self.assertTrue(client_a_metadata and client_b_metadata and
+                            client_a_metrics_new and client_b_metrics_new and
+                            (client_a_metrics_new != client_a_metrics) and
+                            (client_b_metrics_new != client_b_metrics),
+                            "Invalid 'ceph fs perf stats' metrics after rank0 mds failover")
          except MaxWhileTries:
-            raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
+            raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics")
          finally:
              # cleanup test directories
              self._cleanup_test_dirs()
+
+    def test_client_metrics_and_metadata(self):
+        self.mount_a.umount_wait()
+        self.mount_b.umount_wait()
+        self.fs.delete_all_filesystems()
+
+        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+            "enable_multiple", "true", "--yes-i-really-mean-it")
+
+        # creating filesystem
+        fs_a = self._setup_fs(fs_name="fs1")
+
+        # Mount a client on fs_a
+        self.mount_a.mount_wait(cephfs_name=fs_a.name)
+        self.mount_a.write_n_mb("pad.bin", 1)
+        self.mount_a.write_n_mb("test.bin", 2)
+        self.mount_a.path_to_ino("test.bin")
+        self.mount_a.create_files()
+
+        # creating another filesystem
+        fs_b = self._setup_fs(fs_name="fs2")
+
+        # Mount a client on fs_b
+        self.mount_b.mount_wait(cephfs_name=fs_b.name)
+        self.mount_b.write_n_mb("test.bin", 1)
+        self.mount_b.path_to_ino("test.bin")
+        self.mount_b.create_files()
+
+        fscid_list = [fs_a.id, fs_b.id]
+
+        # validate
+        valid, metrics = self._get_metrics(
+            self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30)
+        log.debug(f"metrics={metrics}")
+        self.assertTrue(valid)
+
+        client_metadata_a = metrics['client_metadata']['fs1']
+        client_metadata_b = metrics['client_metadata']['fs2']
+
+        for i in client_metadata_a:
+            if not (client_metadata_a[i]['hostname']):
+                raise RuntimeError("hostname of fs1 not found!")
+            if not (client_metadata_a[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs1 not found!")
+
+        for i in client_metadata_b:
+            if not (client_metadata_b[i]['hostname']):
+                raise RuntimeError("hostname of fs2 not found!")
+            if not (client_metadata_b[i]['valid_metrics']):
+                raise RuntimeError("valid_metrics of fs2 not found!")
+
author	Neeraj Pratap Singh <neesingh@redhat.com>
	Mon, 6 Jun 2022 06:02:46 +0000 (11:32 +0530)
committer	Jos Collin <jcollin@redhat.com>
	Thu, 1 Sep 2022 05:46:25 +0000 (11:16 +0530)