qa: test `ceph fs perf stats` doesn't output stale metrics

author Jos Collin <jcollin@redhat.com>

Fri, 9 Jul 2021 11:26:47 +0000 (16:56 +0530)

committer Jos Collin <jcollin@redhat.com>

Tue, 19 Apr 2022 08:51:01 +0000 (14:21 +0530)
author Jos Collin <jcollin@redhat.com>
Fri, 9 Jul 2021 11:26:47 +0000 (16:56 +0530)
committer Jos Collin <jcollin@redhat.com>
Tue, 19 Apr 2022 08:51:01 +0000 (14:21 +0530)
diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py

index be680bb8600d580e78aa8f4456b18dd059b64ac6..4c85d9f25102950fa4d1d3a4d9bbbebb13060c9f 100644 (file)
--- a/qa/tasks/cephfs/test_mds_metrics.py
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -5,7 +5,7 @@ import random
  import logging
  import errno
  
-from teuthology.contextutil import safe_while
+from teuthology.contextutil import safe_while, MaxWhileTries
  from teuthology.exceptions import CommandFailedError
  from tasks.cephfs.cephfs_test_case import CephFSTestCase
  
@@ -394,3 +394,59 @@ class TestMDSMetrics(CephFSTestCase):
                  raise
          else:
              raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip")
+
+    def test_perf_stats_stale_metrics(self):
+        """
+        That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover
+        """
+        # validate
+        valid, metrics = self._get_metrics(self.verify_mds_metrics(
+            active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+        log.debug("metrics={0}".format(metrics))
+        self.assertTrue(valid)
+
+        global_metrics = metrics['global_metrics']
+
+        #TestMDSMetrics.CLIENTS_REQUIRED clients are mounted here. So they should be
+        #the first two entries in the global_metrics and won't be culled later on.
+        gm_keys_list = list(global_metrics.keys())
+        client1_metrics = global_metrics[gm_keys_list[0]]
+        client2_metrics = global_metrics[gm_keys_list[1]]
+
+        #fail rank0 mds
+        self.fs.rank_fail(rank=0)
+
+        # Wait for 10 seconds for the failover to complete and
+        # the mgr to get initial metrics from the new rank0 mds.
+        time.sleep(10)
+
+        fscid = self.fs.id
+
+        # spread directory per rank
+        self._spread_directory_on_all_ranks(fscid)
+
+        # spread some I/O
+        self._do_spread_io_all_clients(fscid)
+
+        # wait a bit for mgr to get updated metrics
+        time.sleep(5)
+
+        # validate
+        try:
+            valid, metrics_new = self._get_metrics(self.verify_mds_metrics(
+                active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+            log.debug("metrics={0}".format(metrics_new))
+            self.assertTrue(valid)
+
+            global_metrics = metrics_new['global_metrics']
+            client1_metrics_new = global_metrics[gm_keys_list[0]]
+            client2_metrics_new = global_metrics[gm_keys_list[1]]
+
+            #the metrics should be different for the test to succeed.
+            self.assertNotEqual(client1_metrics, client1_metrics_new)
+            self.assertNotEqual(client2_metrics, client2_metrics_new)
+        except MaxWhileTries:
+            raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
+        finally:
+            # cleanup test directories
+            self._cleanup_test_dirs()
author	Jos Collin <jcollin@redhat.com>
	Fri, 9 Jul 2021 11:26:47 +0000 (16:56 +0530)
committer	Jos Collin <jcollin@redhat.com>
	Tue, 19 Apr 2022 08:51:01 +0000 (14:21 +0530)