From 28ba9586d33d8168d4849e34b5380e275a7654b1 Mon Sep 17 00:00:00 2001 From: Jos Collin Date: Thu, 29 Feb 2024 16:20:03 +0530 Subject: [PATCH] qa: enhance labeled perf counters test for cephfs-mirror Implements checks for labeled perf counters in the appropriate tests. This patch verifies snaps_synced, snaps_renamed, snaps_deleted and sync_failures metrics are updated correctly based on the tests. Fixes: https://tracker.ceph.com/issues/64486 Signed-off-by: Jos Collin (cherry picked from commit 7b7e5d4221389ce077780450d78a4a1e6a3d0a43) Conflicts: qa/tasks/cephfs/test_mirroring.py Conflicted due to c7c38ba not backported --- qa/tasks/cephfs/test_mirroring.py | 111 +++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index 7324dde1c6c13..f4a9766fefca9 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -544,6 +544,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + first = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -552,6 +556,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + second = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(second["counters"]["snaps_synced"], first["counters"]["snaps_synced"]) + # some more IO self.mount_a.run_shell(["mkdir", "d0/d00"]) self.mount_a.run_shell(["mkdir", "d0/d01"]) @@ -567,6 +576,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap1', 2) self.verify_snapshot('d0', 'snap1') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + third = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(third["counters"]["snaps_synced"], second["counters"]["snaps_synced"]) + # delete a snapshot self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) @@ -575,6 +589,10 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap0' not in snap_list) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fourth["counters"]["snaps_deleted"], third["counters"]["snaps_deleted"]) # rename a snapshot self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"]) @@ -585,6 +603,10 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap2' in snap_list) self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_renamed + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fifth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fifth["counters"]["snaps_renamed"], fourth["counters"]["snaps_renamed"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -623,6 +645,12 @@ class TestMirroring(CephFSTestCase): snap_list = self.mount_b.ls(path='d0/.snap') self.assertTrue('snap0' not in snap_list) + + # check sync_failures + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vmirror_peers["counters"]["sync_failures"], 0) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_restart_sync_on_blocklist(self): @@ -651,6 +679,10 @@ class TestMirroring(CephFSTestCase): # fetch rados address for blacklist check rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -679,6 +711,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -687,6 +723,10 @@ class TestMirroring(CephFSTestCase): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # add a non-existent directory for synchronization self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') @@ -703,6 +743,10 @@ class TestMirroring(CephFSTestCase): time.sleep(120) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_service_daemon_status(self): @@ -756,7 +800,7 @@ class TestMirroring(CephFSTestCase): self.disable_mirroring_module() # enable mirroring through mon interface -- this should result in the mirror daemon - # failing to enable mirroring due to absence of `cephfs_mirorr` index object. + # failing to enable mirroring due to absence of `cephfs_mirror` index object. self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed: @@ -883,6 +927,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -891,6 +939,10 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -902,12 +954,20 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"]) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create snapshots in parent directories self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"]) @@ -919,12 +979,20 @@ class TestMirroring(CephFSTestCase): time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"]) time.sleep(15) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_deleted"], vthird["counters"]["snaps_deleted"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1027,6 +1095,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -1034,6 +1105,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(5, 20) @@ -1046,6 +1121,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) # diff again, this time back to HEAD log.debug('resetting to HEAD') @@ -1057,6 +1135,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3) self.verify_snapshot(repo_path, 'snap_c') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_synced"], vthird["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1126,11 +1207,18 @@ class TestMirroring(CephFSTestCase): while turns != len(typs): snapname = f'snap_{turns}' cleanup_and_create_with_type('d0', fnames) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}']) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', snapname, turns+1) verify_types('d0', fnames, snapname) + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + # next type typs.rotate(1) turns += 1 @@ -1176,6 +1264,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -1183,6 +1274,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(60, 100) @@ -1199,6 +1293,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1256,6 +1353,9 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] # take snapshots log.debug('taking snapshots') self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -1317,6 +1417,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d2', 'snap0', 1) self.verify_snapshot('d2', 'snap0') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + self.assertGreater(vafter["counters"]["snaps_deleted"], vbefore["counters"]["snaps_deleted"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1345,6 +1449,11 @@ class TestMirroring(CephFSTestCase): time.sleep(60) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/l1', 'snap0', 1) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + snaps_synced = vmirror_peers["counters"]["snaps_synced"] + self.assertEqual(snaps_synced, 1, f"Mismatch snaps_synced: {snaps_synced} vs 1") mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() -- 2.39.5