From 1be67670264b3c780345c014d176c27e670bccdb Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Fri, 9 Jul 2021 16:56:47 +0530 Subject: [PATCH] qa: test `ceph fs perf stats` doesn't output stale metrics That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover. Fixes: https://tracker.ceph.com/issues/50033 Signed-off-by: Jos Collin (cherry picked from commit 116e89a2f2849ed7cb711d1ae465c6f510b2810d) --- qa/tasks/cephfs/test_mds_metrics.py | 58 ++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py index be680bb8600d5..4c85d9f251029 100644 --- a/qa/tasks/cephfs/test_mds_metrics.py +++ b/qa/tasks/cephfs/test_mds_metrics.py @@ -5,7 +5,7 @@ import random import logging import errno -from teuthology.contextutil import safe_while +from teuthology.contextutil import safe_while, MaxWhileTries from teuthology.exceptions import CommandFailedError from tasks.cephfs.cephfs_test_case import CephFSTestCase @@ -394,3 +394,59 @@ class TestMDSMetrics(CephFSTestCase): raise else: raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip") + + def test_perf_stats_stale_metrics(self): + """ + That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover + """ + # validate + valid, metrics = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + global_metrics = metrics['global_metrics'] + + #TestMDSMetrics.CLIENTS_REQUIRED clients are mounted here. So they should be + #the first two entries in the global_metrics and won't be culled later on. + gm_keys_list = list(global_metrics.keys()) + client1_metrics = global_metrics[gm_keys_list[0]] + client2_metrics = global_metrics[gm_keys_list[1]] + + #fail rank0 mds + self.fs.rank_fail(rank=0) + + # Wait for 10 seconds for the failover to complete and + # the mgr to get initial metrics from the new rank0 mds. + time.sleep(10) + + fscid = self.fs.id + + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io_all_clients(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + try: + valid, metrics_new = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics_new)) + self.assertTrue(valid) + + global_metrics = metrics_new['global_metrics'] + client1_metrics_new = global_metrics[gm_keys_list[0]] + client2_metrics_new = global_metrics[gm_keys_list[1]] + + #the metrics should be different for the test to succeed. + self.assertNotEqual(client1_metrics, client1_metrics_new) + self.assertNotEqual(client2_metrics, client2_metrics_new) + except MaxWhileTries: + raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics") + finally: + # cleanup test directories + self._cleanup_test_dirs() -- 2.39.5